diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 50c7f038fc6b..09afa641acf9 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -1,727 +1,723 @@ //===- LangOptions.h - C Language Family Language Options -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // /// \file /// Defines the clang::LangOptions interface. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H #define LLVM_CLANG_BASIC_LANGOPTIONS_H #include "clang/Basic/CommentOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Sanitizers.h" #include "clang/Basic/TargetCXXABI.h" #include "clang/Basic/Visibility.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include #include namespace clang { /// Bitfields of LangOptions, split out from LangOptions in order to ensure that /// this large collection of bitfields is a trivial class type. class LangOptionsBase { friend class CompilerInvocation; public: // Define simple language options (with no accessors). #define LANGOPT(Name, Bits, Default, Description) unsigned Name : Bits; #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) #include "clang/Basic/LangOptions.def" protected: // Define language options of enumeration type. These are private, and will // have accessors (below). #define LANGOPT(Name, Bits, Default, Description) #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ unsigned Name : Bits; #include "clang/Basic/LangOptions.def" }; /// In the Microsoft ABI, this controls the placement of virtual displacement /// members used to implement virtual inheritance. enum class MSVtorDispMode { Never, ForVBaseOverride, ForVFTable }; /// Keeps track of the various options that can be /// enabled, which controls the dialect of C or C++ that is accepted. class LangOptions : public LangOptionsBase { public: using Visibility = clang::Visibility; using RoundingMode = llvm::RoundingMode; enum GCMode { NonGC, GCOnly, HybridGC }; enum StackProtectorMode { SSPOff, SSPOn, SSPStrong, SSPReq }; // Automatic variables live on the stack, and when trivial they're usually // uninitialized because it's undefined behavior to use them without // initializing them. enum class TrivialAutoVarInitKind { Uninitialized, Zero, Pattern }; enum SignedOverflowBehaviorTy { // Default C standard behavior. SOB_Undefined, // -fwrapv SOB_Defined, // -ftrapv SOB_Trapping }; // FIXME: Unify with TUKind. enum CompilingModuleKind { /// Not compiling a module interface at all. CMK_None, /// Compiling a module from a module map. CMK_ModuleMap, /// Compiling a module from a list of header files. CMK_HeaderModule, /// Compiling a C++ modules TS module interface unit. CMK_ModuleInterface, }; enum PragmaMSPointersToMembersKind { PPTMK_BestCase, PPTMK_FullGeneralitySingleInheritance, PPTMK_FullGeneralityMultipleInheritance, PPTMK_FullGeneralityVirtualInheritance }; using MSVtorDispMode = clang::MSVtorDispMode; enum DefaultCallingConvention { DCC_None, DCC_CDecl, DCC_FastCall, DCC_StdCall, DCC_VectorCall, DCC_RegCall }; enum AddrSpaceMapMangling { ASMM_Target, ASMM_On, ASMM_Off }; // Corresponds to _MSC_VER enum MSVCMajorVersion { MSVC2010 = 1600, MSVC2012 = 1700, MSVC2013 = 1800, MSVC2015 = 1900, MSVC2017 = 1910, MSVC2017_5 = 1912, MSVC2017_7 = 1914, MSVC2019 = 1920, MSVC2019_5 = 1925, MSVC2019_8 = 1928, }; enum SYCLMajorVersion { SYCL_None, SYCL_2017, SYCL_2020, // The "default" SYCL version to be used when none is specified on the // frontend command line. SYCL_Default = SYCL_2020 }; /// Clang versions with different platform ABI conformance. enum class ClangABI { /// Attempt to be ABI-compatible with code generated by Clang 3.8.x /// (SVN r257626). This causes <1 x long long> to be passed in an /// integer register instead of an SSE register on x64_64. Ver3_8, /// Attempt to be ABI-compatible with code generated by Clang 4.0.x /// (SVN r291814). This causes move operations to be ignored when /// determining whether a class type can be passed or returned directly. Ver4, /// Attempt to be ABI-compatible with code generated by Clang 6.0.x /// (SVN r321711). This causes determination of whether a type is /// standard-layout to ignore collisions between empty base classes /// and between base classes and member subobjects, which affects /// whether we reuse base class tail padding in some ABIs. Ver6, /// Attempt to be ABI-compatible with code generated by Clang 7.0.x /// (SVN r338536). This causes alignof (C++) and _Alignof (C11) to be /// compatible with __alignof (i.e., return the preferred alignment) /// rather than returning the required alignment. Ver7, /// Attempt to be ABI-compatible with code generated by Clang 9.0.x /// (SVN r351319). This causes vectors of __int128 to be passed in memory /// instead of passing in multiple scalar registers on x86_64 on Linux and /// NetBSD. Ver9, /// Attempt to be ABI-compatible with code generated by Clang 11.0.x /// (git 2e10b7a39b93). This causes clang to pass unions with a 256-bit /// vector member on the stack instead of using registers, to not properly /// mangle substitutions for template names in some cases, and to mangle /// declaration template arguments without a cast to the parameter type /// even when that can lead to mangling collisions. Ver11, /// Attempt to be ABI-compatible with code generated by Clang 12.0.x /// (git 8e464dd76bef). This causes clang to mangle lambdas within /// global-scope inline variables incorrectly. Ver12, - /// Attempt to be ABI-compatible with code generated by Clang 13.0.x. - /// This causes clang to not pack non-POD members of packed structs. - Ver13, - /// Conform to the underlying platform's C and C++ ABIs as closely /// as we can. Latest }; enum class CoreFoundationABI { /// No interoperability ABI has been specified Unspecified, /// CoreFoundation does not have any language interoperability Standalone, /// Interoperability with the ObjectiveC runtime ObjectiveC, /// Interoperability with the latest known version of the Swift runtime Swift, /// Interoperability with the Swift 5.0 runtime Swift5_0, /// Interoperability with the Swift 4.2 runtime Swift4_2, /// Interoperability with the Swift 4.1 runtime Swift4_1, }; enum FPModeKind { // Disable the floating point pragma FPM_Off, // Enable the floating point pragma FPM_On, // Aggressively fuse FP ops (E.g. FMA) disregarding pragmas. FPM_Fast, // Aggressively fuse FP ops and honor pragmas. FPM_FastHonorPragmas }; /// Alias for RoundingMode::NearestTiesToEven. static constexpr unsigned FPR_ToNearest = static_cast(llvm::RoundingMode::NearestTiesToEven); /// Possible floating point exception behavior. enum FPExceptionModeKind { /// Assume that floating-point exceptions are masked. FPE_Ignore, /// Transformations do not cause new exceptions but may hide some. FPE_MayTrap, /// Strictly preserve the floating-point exception semantics. FPE_Strict }; /// Possible exception handling behavior. enum class ExceptionHandlingKind { None, SjLj, WinEH, DwarfCFI, Wasm }; enum class LaxVectorConversionKind { /// Permit no implicit vector bitcasts. None, /// Permit vector bitcasts between integer vectors with different numbers /// of elements but the same total bit-width. Integer, /// Permit vector bitcasts between all vectors with the same total /// bit-width. All, }; enum class AltivecSrcCompatKind { // All vector compares produce scalars except vector pixel and vector bool. // The types vector pixel and vector bool return vector results. Mixed, // All vector compares produce vector results as in GCC. GCC, // All vector compares produce scalars as in XL. XL, // Default clang behaviour. Default = Mixed, }; enum class SignReturnAddressScopeKind { /// No signing for any function. None, /// Sign the return address of functions that spill LR. NonLeaf, /// Sign the return address of all functions, All }; enum class SignReturnAddressKeyKind { /// Return address signing uses APIA key. AKey, /// Return address signing uses APIB key. BKey }; enum class ThreadModelKind { /// POSIX Threads. POSIX, /// Single Threaded Environment. Single }; enum class ExtendArgsKind { /// Integer arguments are sign or zero extended to 32/64 bits /// during default argument promotions. ExtendTo32, ExtendTo64 }; public: /// The used language standard. LangStandard::Kind LangStd; /// Set of enabled sanitizers. SanitizerSet Sanitize; /// Is at least one coverage instrumentation type enabled. bool SanitizeCoverage = false; /// Paths to files specifying which objects /// (files, functions, variables) should not be instrumented. std::vector NoSanitizeFiles; /// Paths to the XRay "always instrument" files specifying which /// objects (files, functions, variables) should be imbued with the XRay /// "always instrument" attribute. /// WARNING: This is a deprecated field and will go away in the future. std::vector XRayAlwaysInstrumentFiles; /// Paths to the XRay "never instrument" files specifying which /// objects (files, functions, variables) should be imbued with the XRay /// "never instrument" attribute. /// WARNING: This is a deprecated field and will go away in the future. std::vector XRayNeverInstrumentFiles; /// Paths to the XRay attribute list files, specifying which objects /// (files, functions, variables) should be imbued with the appropriate XRay /// attribute(s). std::vector XRayAttrListFiles; /// Paths to special case list files specifying which entities /// (files, functions) should or should not be instrumented. std::vector ProfileListFiles; clang::ObjCRuntime ObjCRuntime; CoreFoundationABI CFRuntime = CoreFoundationABI::Unspecified; std::string ObjCConstantStringClass; /// The name of the handler function to be called when -ftrapv is /// specified. /// /// If none is specified, abort (GCC-compatible behaviour). std::string OverflowHandler; /// The module currently being compiled as specified by -fmodule-name. std::string ModuleName; /// The name of the current module, of which the main source file /// is a part. If CompilingModule is set, we are compiling the interface /// of this module, otherwise we are compiling an implementation file of /// it. This starts as ModuleName in case -fmodule-name is provided and /// changes during compilation to reflect the current module. std::string CurrentModule; /// The names of any features to enable in module 'requires' decls /// in addition to the hard-coded list in Module.cpp and the target features. /// /// This list is sorted. std::vector ModuleFeatures; /// Options for parsing comments. CommentOptions CommentOpts; /// A list of all -fno-builtin-* function names (e.g., memset). std::vector NoBuiltinFuncs; /// A prefix map for __FILE__, __BASE_FILE__ and __builtin_FILE(). std::map> MacroPrefixMap; /// Triples of the OpenMP targets that the host code codegen should /// take into account in order to generate accurate offloading descriptors. std::vector OMPTargetTriples; /// Name of the IR file that contains the result of the OpenMP target /// host code generation. std::string OMPHostIRFile; /// The user provided compilation unit ID, if non-empty. This is used to /// externalize static variables which is needed to support accessing static /// device variables in host code for single source offloading languages /// like CUDA/HIP. std::string CUID; /// C++ ABI to compile with, if specified by the frontend through -fc++-abi=. /// This overrides the default ABI used by the target. llvm::Optional CXXABI; /// Indicates whether the front-end is explicitly told that the /// input is a header file (i.e. -x c-header). bool IsHeaderFile = false; LangOptions(); // Define accessors/mutators for language options of enumeration type. #define LANGOPT(Name, Bits, Default, Description) #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ Type get##Name() const { return static_cast(Name); } \ void set##Name(Type Value) { Name = static_cast(Value); } #include "clang/Basic/LangOptions.def" /// Are we compiling a module interface (.cppm or module map)? bool isCompilingModule() const { return getCompilingModule() != CMK_None; } /// Do we need to track the owning module for a local declaration? bool trackLocalOwningModule() const { return isCompilingModule() || ModulesLocalVisibility; } bool isSignedOverflowDefined() const { return getSignedOverflowBehavior() == SOB_Defined; } bool isSubscriptPointerArithmetic() const { return ObjCRuntime.isSubscriptPointerArithmetic() && !ObjCSubscriptingLegacyRuntime; } bool isCompatibleWithMSVC(MSVCMajorVersion MajorVersion) const { return MSCompatibilityVersion >= MajorVersion * 100000U; } /// Reset all of the options that are not considered when building a /// module. void resetNonModularOptions(); /// Is this a libc/libm function that is no longer recognized as a /// builtin because a -fno-builtin-* option has been specified? bool isNoBuiltinFunc(StringRef Name) const; /// True if any ObjC types may have non-trivial lifetime qualifiers. bool allowsNonTrivialObjCLifetimeQualifiers() const { return ObjCAutoRefCount || ObjCWeak; } bool assumeFunctionsAreConvergent() const { return ConvergentFunctions; } /// Return the OpenCL C or C++ version as a VersionTuple. VersionTuple getOpenCLVersionTuple() const; /// Return the OpenCL version that kernel language is compatible with unsigned getOpenCLCompatibleVersion() const; /// Return the OpenCL C or C++ for OpenCL language name and version /// as a string. std::string getOpenCLVersionString() const; /// Check if return address signing is enabled. bool hasSignReturnAddress() const { return getSignReturnAddressScope() != SignReturnAddressScopeKind::None; } /// Check if return address signing uses AKey. bool isSignReturnAddressWithAKey() const { return getSignReturnAddressKey() == SignReturnAddressKeyKind::AKey; } /// Check if leaf functions are also signed. bool isSignReturnAddressScopeAll() const { return getSignReturnAddressScope() == SignReturnAddressScopeKind::All; } bool hasSjLjExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::SjLj; } bool hasSEHExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::WinEH; } bool hasDWARFExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::DwarfCFI; } bool hasWasmExceptions() const { return getExceptionHandling() == ExceptionHandlingKind::Wasm; } bool isSYCL() const { return SYCLIsDevice || SYCLIsHost; } /// Remap path prefix according to -fmacro-prefix-path option. void remapPathPrefix(SmallString<256> &Path) const; }; /// Floating point control options class FPOptionsOverride; class FPOptions { public: // We start by defining the layout. using storage_type = uint16_t; using RoundingMode = llvm::RoundingMode; static constexpr unsigned StorageBitSize = 8 * sizeof(storage_type); // Define a fake option named "First" so that we have a PREVIOUS even for the // real first option. static constexpr storage_type FirstShift = 0, FirstWidth = 0; #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ static constexpr storage_type NAME##Shift = \ PREVIOUS##Shift + PREVIOUS##Width; \ static constexpr storage_type NAME##Width = WIDTH; \ static constexpr storage_type NAME##Mask = ((1 << NAME##Width) - 1) \ << NAME##Shift; #include "clang/Basic/FPOptions.def" static constexpr storage_type TotalWidth = 0 #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) +WIDTH #include "clang/Basic/FPOptions.def" ; static_assert(TotalWidth <= StorageBitSize, "Too short type for FPOptions"); private: storage_type Value; public: FPOptions() : Value(0) { setFPContractMode(LangOptions::FPM_Off); setRoundingMode(static_cast(LangOptions::FPR_ToNearest)); setFPExceptionMode(LangOptions::FPE_Ignore); } explicit FPOptions(const LangOptions &LO) { Value = 0; // The language fp contract option FPM_FastHonorPragmas has the same effect // as FPM_Fast in frontend. For simplicity, use FPM_Fast uniformly in // frontend. auto LangOptContractMode = LO.getDefaultFPContractMode(); if (LangOptContractMode == LangOptions::FPM_FastHonorPragmas) LangOptContractMode = LangOptions::FPM_Fast; setFPContractMode(LangOptContractMode); setRoundingMode(LO.getFPRoundingMode()); setFPExceptionMode(LO.getFPExceptionMode()); setAllowFPReassociate(LO.AllowFPReassoc); setNoHonorNaNs(LO.NoHonorNaNs); setNoHonorInfs(LO.NoHonorInfs); setNoSignedZero(LO.NoSignedZero); setAllowReciprocal(LO.AllowRecip); setAllowApproxFunc(LO.ApproxFunc); if (getFPContractMode() == LangOptions::FPM_On && getRoundingMode() == llvm::RoundingMode::Dynamic && getFPExceptionMode() == LangOptions::FPE_Strict) // If the FP settings are set to the "strict" model, then // FENV access is set to true. (ffp-model=strict) setAllowFEnvAccess(true); else setAllowFEnvAccess(LangOptions::FPM_Off); } bool allowFPContractWithinStatement() const { return getFPContractMode() == LangOptions::FPM_On; } void setAllowFPContractWithinStatement() { setFPContractMode(LangOptions::FPM_On); } bool allowFPContractAcrossStatement() const { return getFPContractMode() == LangOptions::FPM_Fast; } void setAllowFPContractAcrossStatement() { setFPContractMode(LangOptions::FPM_Fast); } bool isFPConstrained() const { return getRoundingMode() != llvm::RoundingMode::NearestTiesToEven || getFPExceptionMode() != LangOptions::FPE_Ignore || getAllowFEnvAccess(); } bool operator==(FPOptions other) const { return Value == other.Value; } /// Return the default value of FPOptions that's used when trailing /// storage isn't required. static FPOptions defaultWithoutTrailingStorage(const LangOptions &LO); storage_type getAsOpaqueInt() const { return Value; } static FPOptions getFromOpaqueInt(storage_type Value) { FPOptions Opts; Opts.Value = Value; return Opts; } // We can define most of the accessors automatically: #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ TYPE get##NAME() const { \ return static_cast((Value & NAME##Mask) >> NAME##Shift); \ } \ void set##NAME(TYPE value) { \ Value = (Value & ~NAME##Mask) | (storage_type(value) << NAME##Shift); \ } #include "clang/Basic/FPOptions.def" LLVM_DUMP_METHOD void dump(); }; /// Represents difference between two FPOptions values. /// /// The effect of language constructs changing the set of floating point options /// is usually a change of some FP properties while leaving others intact. This /// class describes such changes by keeping information about what FP options /// are overridden. /// /// The integral set of FP options, described by the class FPOptions, may be /// represented as a default FP option set, defined by language standard and /// command line options, with the overrides introduced by pragmas. /// /// The is implemented as a value of the new FPOptions plus a mask showing which /// fields are actually set in it. class FPOptionsOverride { FPOptions Options = FPOptions::getFromOpaqueInt(0); FPOptions::storage_type OverrideMask = 0; public: using RoundingMode = llvm::RoundingMode; /// The type suitable for storing values of FPOptionsOverride. Must be twice /// as wide as bit size of FPOption. using storage_type = uint32_t; static_assert(sizeof(storage_type) >= 2 * sizeof(FPOptions::storage_type), "Too short type for FPOptionsOverride"); /// Bit mask selecting bits of OverrideMask in serialized representation of /// FPOptionsOverride. static constexpr storage_type OverrideMaskBits = (static_cast(1) << FPOptions::StorageBitSize) - 1; FPOptionsOverride() {} FPOptionsOverride(const LangOptions &LO) : Options(LO), OverrideMask(OverrideMaskBits) {} FPOptionsOverride(FPOptions FPO) : Options(FPO), OverrideMask(OverrideMaskBits) {} bool requiresTrailingStorage() const { return OverrideMask != 0; } void setAllowFPContractWithinStatement() { setFPContractModeOverride(LangOptions::FPM_On); } void setAllowFPContractAcrossStatement() { setFPContractModeOverride(LangOptions::FPM_Fast); } void setDisallowFPContract() { setFPContractModeOverride(LangOptions::FPM_Off); } void setFPPreciseEnabled(bool Value) { setAllowFPReassociateOverride(!Value); setNoHonorNaNsOverride(!Value); setNoHonorInfsOverride(!Value); setNoSignedZeroOverride(!Value); setAllowReciprocalOverride(!Value); setAllowApproxFuncOverride(!Value); if (Value) /* Precise mode implies fp_contract=on and disables ffast-math */ setAllowFPContractWithinStatement(); else /* Precise mode disabled sets fp_contract=fast and enables ffast-math */ setAllowFPContractAcrossStatement(); } storage_type getAsOpaqueInt() const { return (static_cast(Options.getAsOpaqueInt()) << FPOptions::StorageBitSize) | OverrideMask; } static FPOptionsOverride getFromOpaqueInt(storage_type I) { FPOptionsOverride Opts; Opts.OverrideMask = I & OverrideMaskBits; Opts.Options = FPOptions::getFromOpaqueInt(I >> FPOptions::StorageBitSize); return Opts; } FPOptions applyOverrides(FPOptions Base) { FPOptions Result = FPOptions::getFromOpaqueInt((Base.getAsOpaqueInt() & ~OverrideMask) | (Options.getAsOpaqueInt() & OverrideMask)); return Result; } FPOptions applyOverrides(const LangOptions &LO) { return applyOverrides(FPOptions(LO)); } bool operator==(FPOptionsOverride other) const { return Options == other.Options && OverrideMask == other.OverrideMask; } bool operator!=(FPOptionsOverride other) const { return !(*this == other); } #define OPTION(NAME, TYPE, WIDTH, PREVIOUS) \ bool has##NAME##Override() const { \ return OverrideMask & FPOptions::NAME##Mask; \ } \ TYPE get##NAME##Override() const { \ assert(has##NAME##Override()); \ return Options.get##NAME(); \ } \ void clear##NAME##Override() { \ /* Clear the actual value so that we don't have spurious differences when \ * testing equality. */ \ Options.set##NAME(TYPE(0)); \ OverrideMask &= ~FPOptions::NAME##Mask; \ } \ void set##NAME##Override(TYPE value) { \ Options.set##NAME(value); \ OverrideMask |= FPOptions::NAME##Mask; \ } #include "clang/Basic/FPOptions.def" LLVM_DUMP_METHOD void dump(); }; /// Describes the kind of translation unit being processed. enum TranslationUnitKind { /// The translation unit is a complete translation unit. TU_Complete, /// The translation unit is a prefix to a translation unit, and is /// not complete. TU_Prefix, /// The translation unit is a module. TU_Module, /// The translation unit is a is a complete translation unit that we might /// incrementally extend later. TU_Incremental }; } // namespace clang #endif // LLVM_CLANG_BASIC_LANGOPTIONS_H diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp index 709e05716a56..61a30ead165e 100644 --- a/clang/lib/AST/RecordLayoutBuilder.cpp +++ b/clang/lib/AST/RecordLayoutBuilder.cpp @@ -1,3716 +1,3711 @@ //=== RecordLayoutBuilder.cpp - Helper class for building record layouts ---==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/Attr.h" #include "clang/AST/CXXInheritance.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" #include "clang/AST/VTableBuilder.h" #include "clang/AST/RecordLayout.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/Format.h" #include "llvm/Support/MathExtras.h" using namespace clang; namespace { /// BaseSubobjectInfo - Represents a single base subobject in a complete class. /// For a class hierarchy like /// /// class A { }; /// class B : A { }; /// class C : A, B { }; /// /// The BaseSubobjectInfo graph for C will have three BaseSubobjectInfo /// instances, one for B and two for A. /// /// If a base is virtual, it will only have one BaseSubobjectInfo allocated. struct BaseSubobjectInfo { /// Class - The class for this base info. const CXXRecordDecl *Class; /// IsVirtual - Whether the BaseInfo represents a virtual base or not. bool IsVirtual; /// Bases - Information about the base subobjects. SmallVector Bases; /// PrimaryVirtualBaseInfo - Holds the base info for the primary virtual base /// of this base info (if one exists). BaseSubobjectInfo *PrimaryVirtualBaseInfo; // FIXME: Document. const BaseSubobjectInfo *Derived; }; /// Externally provided layout. Typically used when the AST source, such /// as DWARF, lacks all the information that was available at compile time, such /// as alignment attributes on fields and pragmas in effect. struct ExternalLayout { ExternalLayout() : Size(0), Align(0) {} /// Overall record size in bits. uint64_t Size; /// Overall record alignment in bits. uint64_t Align; /// Record field offsets in bits. llvm::DenseMap FieldOffsets; /// Direct, non-virtual base offsets. llvm::DenseMap BaseOffsets; /// Virtual base offsets. llvm::DenseMap VirtualBaseOffsets; /// Get the offset of the given field. The external source must provide /// entries for all fields in the record. uint64_t getExternalFieldOffset(const FieldDecl *FD) { assert(FieldOffsets.count(FD) && "Field does not have an external offset"); return FieldOffsets[FD]; } bool getExternalNVBaseOffset(const CXXRecordDecl *RD, CharUnits &BaseOffset) { auto Known = BaseOffsets.find(RD); if (Known == BaseOffsets.end()) return false; BaseOffset = Known->second; return true; } bool getExternalVBaseOffset(const CXXRecordDecl *RD, CharUnits &BaseOffset) { auto Known = VirtualBaseOffsets.find(RD); if (Known == VirtualBaseOffsets.end()) return false; BaseOffset = Known->second; return true; } }; /// EmptySubobjectMap - Keeps track of which empty subobjects exist at different /// offsets while laying out a C++ class. class EmptySubobjectMap { const ASTContext &Context; uint64_t CharWidth; /// Class - The class whose empty entries we're keeping track of. const CXXRecordDecl *Class; /// EmptyClassOffsets - A map from offsets to empty record decls. typedef llvm::TinyPtrVector ClassVectorTy; typedef llvm::DenseMap EmptyClassOffsetsMapTy; EmptyClassOffsetsMapTy EmptyClassOffsets; /// MaxEmptyClassOffset - The highest offset known to contain an empty /// base subobject. CharUnits MaxEmptyClassOffset; /// ComputeEmptySubobjectSizes - Compute the size of the largest base or /// member subobject that is empty. void ComputeEmptySubobjectSizes(); void AddSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset); void UpdateEmptyBaseSubobjects(const BaseSubobjectInfo *Info, CharUnits Offset, bool PlacingEmptyBase); void UpdateEmptyFieldSubobjects(const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset, bool PlacingOverlappingField); void UpdateEmptyFieldSubobjects(const FieldDecl *FD, CharUnits Offset, bool PlacingOverlappingField); /// AnyEmptySubobjectsBeyondOffset - Returns whether there are any empty /// subobjects beyond the given offset. bool AnyEmptySubobjectsBeyondOffset(CharUnits Offset) const { return Offset <= MaxEmptyClassOffset; } CharUnits getFieldOffset(const ASTRecordLayout &Layout, unsigned FieldNo) const { uint64_t FieldOffset = Layout.getFieldOffset(FieldNo); assert(FieldOffset % CharWidth == 0 && "Field offset not at char boundary!"); return Context.toCharUnitsFromBits(FieldOffset); } protected: bool CanPlaceSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset) const; bool CanPlaceBaseSubobjectAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset); bool CanPlaceFieldSubobjectAtOffset(const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset) const; bool CanPlaceFieldSubobjectAtOffset(const FieldDecl *FD, CharUnits Offset) const; public: /// This holds the size of the largest empty subobject (either a base /// or a member). Will be zero if the record being built doesn't contain /// any empty classes. CharUnits SizeOfLargestEmptySubobject; EmptySubobjectMap(const ASTContext &Context, const CXXRecordDecl *Class) : Context(Context), CharWidth(Context.getCharWidth()), Class(Class) { ComputeEmptySubobjectSizes(); } /// CanPlaceBaseAtOffset - Return whether the given base class can be placed /// at the given offset. /// Returns false if placing the record will result in two components /// (direct or indirect) of the same type having the same offset. bool CanPlaceBaseAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset); /// CanPlaceFieldAtOffset - Return whether a field can be placed at the given /// offset. bool CanPlaceFieldAtOffset(const FieldDecl *FD, CharUnits Offset); }; void EmptySubobjectMap::ComputeEmptySubobjectSizes() { // Check the bases. for (const CXXBaseSpecifier &Base : Class->bases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits EmptySize; const ASTRecordLayout &Layout = Context.getASTRecordLayout(BaseDecl); if (BaseDecl->isEmpty()) { // If the class decl is empty, get its size. EmptySize = Layout.getSize(); } else { // Otherwise, we get the largest empty subobject for the decl. EmptySize = Layout.getSizeOfLargestEmptySubobject(); } if (EmptySize > SizeOfLargestEmptySubobject) SizeOfLargestEmptySubobject = EmptySize; } // Check the fields. for (const FieldDecl *FD : Class->fields()) { const RecordType *RT = Context.getBaseElementType(FD->getType())->getAs(); // We only care about record types. if (!RT) continue; CharUnits EmptySize; const CXXRecordDecl *MemberDecl = RT->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(MemberDecl); if (MemberDecl->isEmpty()) { // If the class decl is empty, get its size. EmptySize = Layout.getSize(); } else { // Otherwise, we get the largest empty subobject for the decl. EmptySize = Layout.getSizeOfLargestEmptySubobject(); } if (EmptySize > SizeOfLargestEmptySubobject) SizeOfLargestEmptySubobject = EmptySize; } } bool EmptySubobjectMap::CanPlaceSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset) const { // We only need to check empty bases. if (!RD->isEmpty()) return true; EmptyClassOffsetsMapTy::const_iterator I = EmptyClassOffsets.find(Offset); if (I == EmptyClassOffsets.end()) return true; const ClassVectorTy &Classes = I->second; if (!llvm::is_contained(Classes, RD)) return true; // There is already an empty class of the same type at this offset. return false; } void EmptySubobjectMap::AddSubobjectAtOffset(const CXXRecordDecl *RD, CharUnits Offset) { // We only care about empty bases. if (!RD->isEmpty()) return; // If we have empty structures inside a union, we can assign both // the same offset. Just avoid pushing them twice in the list. ClassVectorTy &Classes = EmptyClassOffsets[Offset]; if (llvm::is_contained(Classes, RD)) return; Classes.push_back(RD); // Update the empty class offset. if (Offset > MaxEmptyClassOffset) MaxEmptyClassOffset = Offset; } bool EmptySubobjectMap::CanPlaceBaseSubobjectAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset) { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(Offset)) return true; if (!CanPlaceSubobjectAtOffset(Info->Class, Offset)) return false; // Traverse all non-virtual bases. const ASTRecordLayout &Layout = Context.getASTRecordLayout(Info->Class); for (const BaseSubobjectInfo *Base : Info->Bases) { if (Base->IsVirtual) continue; CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base->Class); if (!CanPlaceBaseSubobjectAtOffset(Base, BaseOffset)) return false; } if (Info->PrimaryVirtualBaseInfo) { BaseSubobjectInfo *PrimaryVirtualBaseInfo = Info->PrimaryVirtualBaseInfo; if (Info == PrimaryVirtualBaseInfo->Derived) { if (!CanPlaceBaseSubobjectAtOffset(PrimaryVirtualBaseInfo, Offset)) return false; } } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) return false; } return true; } void EmptySubobjectMap::UpdateEmptyBaseSubobjects(const BaseSubobjectInfo *Info, CharUnits Offset, bool PlacingEmptyBase) { if (!PlacingEmptyBase && Offset >= SizeOfLargestEmptySubobject) { // We know that the only empty subobjects that can conflict with empty // subobject of non-empty bases, are empty bases that can be placed at // offset zero. Because of this, we only need to keep track of empty base // subobjects with offsets less than the size of the largest empty // subobject for our class. return; } AddSubobjectAtOffset(Info->Class, Offset); // Traverse all non-virtual bases. const ASTRecordLayout &Layout = Context.getASTRecordLayout(Info->Class); for (const BaseSubobjectInfo *Base : Info->Bases) { if (Base->IsVirtual) continue; CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base->Class); UpdateEmptyBaseSubobjects(Base, BaseOffset, PlacingEmptyBase); } if (Info->PrimaryVirtualBaseInfo) { BaseSubobjectInfo *PrimaryVirtualBaseInfo = Info->PrimaryVirtualBaseInfo; if (Info == PrimaryVirtualBaseInfo->Derived) UpdateEmptyBaseSubobjects(PrimaryVirtualBaseInfo, Offset, PlacingEmptyBase); } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = Info->Class->field_begin(), E = Info->Class->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingEmptyBase); } } bool EmptySubobjectMap::CanPlaceBaseAtOffset(const BaseSubobjectInfo *Info, CharUnits Offset) { // If we know this class doesn't have any empty subobjects we don't need to // bother checking. if (SizeOfLargestEmptySubobject.isZero()) return true; if (!CanPlaceBaseSubobjectAtOffset(Info, Offset)) return false; // We are able to place the base at this offset. Make sure to update the // empty base subobject map. UpdateEmptyBaseSubobjects(Info, Offset, Info->Class->isEmpty()); return true; } bool EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset) const { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(Offset)) return true; if (!CanPlaceSubobjectAtOffset(RD, Offset)) return false; const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); // Traverse all non-virtual bases. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(BaseDecl); if (!CanPlaceFieldSubobjectAtOffset(BaseDecl, Class, BaseOffset)) return false; } if (RD == Class) { // This is the most derived class, traverse virtual bases as well. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *VBaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBaseDecl); if (!CanPlaceFieldSubobjectAtOffset(VBaseDecl, Class, VBaseOffset)) return false; } } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); if (!CanPlaceFieldSubobjectAtOffset(*I, FieldOffset)) return false; } return true; } bool EmptySubobjectMap::CanPlaceFieldSubobjectAtOffset(const FieldDecl *FD, CharUnits Offset) const { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(Offset)) return true; QualType T = FD->getType(); if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) return CanPlaceFieldSubobjectAtOffset(RD, RD, Offset); // If we have an array type we need to look at every element. if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) { QualType ElemTy = Context.getBaseElementType(AT); const RecordType *RT = ElemTy->getAs(); if (!RT) return true; const CXXRecordDecl *RD = RT->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); uint64_t NumElements = Context.getConstantArrayElementCount(AT); CharUnits ElementOffset = Offset; for (uint64_t I = 0; I != NumElements; ++I) { // We don't have to keep looking past the maximum offset that's known to // contain an empty class. if (!AnyEmptySubobjectsBeyondOffset(ElementOffset)) return true; if (!CanPlaceFieldSubobjectAtOffset(RD, RD, ElementOffset)) return false; ElementOffset += Layout.getSize(); } } return true; } bool EmptySubobjectMap::CanPlaceFieldAtOffset(const FieldDecl *FD, CharUnits Offset) { if (!CanPlaceFieldSubobjectAtOffset(FD, Offset)) return false; // We are able to place the member variable at this offset. // Make sure to update the empty field subobject map. UpdateEmptyFieldSubobjects(FD, Offset, FD->hasAttr()); return true; } void EmptySubobjectMap::UpdateEmptyFieldSubobjects( const CXXRecordDecl *RD, const CXXRecordDecl *Class, CharUnits Offset, bool PlacingOverlappingField) { // We know that the only empty subobjects that can conflict with empty // field subobjects are subobjects of empty bases and potentially-overlapping // fields that can be placed at offset zero. Because of this, we only need to // keep track of empty field subobjects with offsets less than the size of // the largest empty subobject for our class. // // (Proof: we will only consider placing a subobject at offset zero or at // >= the current dsize. The only cases where the earlier subobject can be // placed beyond the end of dsize is if it's an empty base or a // potentially-overlapping field.) if (!PlacingOverlappingField && Offset >= SizeOfLargestEmptySubobject) return; AddSubobjectAtOffset(RD, Offset); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); // Traverse all non-virtual bases. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(BaseDecl); UpdateEmptyFieldSubobjects(BaseDecl, Class, BaseOffset, PlacingOverlappingField); } if (RD == Class) { // This is the most derived class, traverse virtual bases as well. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *VBaseDecl = Base.getType()->getAsCXXRecordDecl(); CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBaseDecl); UpdateEmptyFieldSubobjects(VBaseDecl, Class, VBaseOffset, PlacingOverlappingField); } } // Traverse all member variables. unsigned FieldNo = 0; for (CXXRecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { if (I->isBitField()) continue; CharUnits FieldOffset = Offset + getFieldOffset(Layout, FieldNo); UpdateEmptyFieldSubobjects(*I, FieldOffset, PlacingOverlappingField); } } void EmptySubobjectMap::UpdateEmptyFieldSubobjects( const FieldDecl *FD, CharUnits Offset, bool PlacingOverlappingField) { QualType T = FD->getType(); if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl()) { UpdateEmptyFieldSubobjects(RD, RD, Offset, PlacingOverlappingField); return; } // If we have an array type we need to update every element. if (const ConstantArrayType *AT = Context.getAsConstantArrayType(T)) { QualType ElemTy = Context.getBaseElementType(AT); const RecordType *RT = ElemTy->getAs(); if (!RT) return; const CXXRecordDecl *RD = RT->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); uint64_t NumElements = Context.getConstantArrayElementCount(AT); CharUnits ElementOffset = Offset; for (uint64_t I = 0; I != NumElements; ++I) { // We know that the only empty subobjects that can conflict with empty // field subobjects are subobjects of empty bases that can be placed at // offset zero. Because of this, we only need to keep track of empty field // subobjects with offsets less than the size of the largest empty // subobject for our class. if (!PlacingOverlappingField && ElementOffset >= SizeOfLargestEmptySubobject) return; UpdateEmptyFieldSubobjects(RD, RD, ElementOffset, PlacingOverlappingField); ElementOffset += Layout.getSize(); } } } typedef llvm::SmallPtrSet ClassSetTy; class ItaniumRecordLayoutBuilder { protected: // FIXME: Remove this and make the appropriate fields public. friend class clang::ASTContext; const ASTContext &Context; EmptySubobjectMap *EmptySubobjects; /// Size - The current size of the record layout. uint64_t Size; /// Alignment - The current alignment of the record layout. CharUnits Alignment; /// PreferredAlignment - The preferred alignment of the record layout. CharUnits PreferredAlignment; /// The alignment if attribute packed is not used. CharUnits UnpackedAlignment; /// \brief The maximum of the alignments of top-level members. CharUnits UnadjustedAlignment; SmallVector FieldOffsets; /// Whether the external AST source has provided a layout for this /// record. unsigned UseExternalLayout : 1; /// Whether we need to infer alignment, even when we have an /// externally-provided layout. unsigned InferAlignment : 1; /// Packed - Whether the record is packed or not. unsigned Packed : 1; unsigned IsUnion : 1; unsigned IsMac68kAlign : 1; unsigned IsNaturalAlign : 1; unsigned IsMsStruct : 1; /// UnfilledBitsInLastUnit - If the last field laid out was a bitfield, /// this contains the number of bits in the last unit that can be used for /// an adjacent bitfield if necessary. The unit in question is usually /// a byte, but larger units are used if IsMsStruct. unsigned char UnfilledBitsInLastUnit; /// LastBitfieldStorageUnitSize - If IsMsStruct, represents the size of the /// storage unit of the previous field if it was a bitfield. unsigned char LastBitfieldStorageUnitSize; /// MaxFieldAlignment - The maximum allowed field alignment. This is set by /// #pragma pack. CharUnits MaxFieldAlignment; /// DataSize - The data size of the record being laid out. uint64_t DataSize; CharUnits NonVirtualSize; CharUnits NonVirtualAlignment; CharUnits PreferredNVAlignment; /// If we've laid out a field but not included its tail padding in Size yet, /// this is the size up to the end of that field. CharUnits PaddedFieldSize; /// PrimaryBase - the primary base class (if one exists) of the class /// we're laying out. const CXXRecordDecl *PrimaryBase; /// PrimaryBaseIsVirtual - Whether the primary base of the class we're laying /// out is virtual. bool PrimaryBaseIsVirtual; /// HasOwnVFPtr - Whether the class provides its own vtable/vftbl /// pointer, as opposed to inheriting one from a primary base class. bool HasOwnVFPtr; /// the flag of field offset changing due to packed attribute. bool HasPackedField; /// HandledFirstNonOverlappingEmptyField - An auxiliary field used for AIX. /// When there are OverlappingEmptyFields existing in the aggregate, the /// flag shows if the following first non-empty or empty-but-non-overlapping /// field has been handled, if any. bool HandledFirstNonOverlappingEmptyField; typedef llvm::DenseMap BaseOffsetsMapTy; /// Bases - base classes and their offsets in the record. BaseOffsetsMapTy Bases; // VBases - virtual base classes and their offsets in the record. ASTRecordLayout::VBaseOffsetsMapTy VBases; /// IndirectPrimaryBases - Virtual base classes, direct or indirect, that are /// primary base classes for some other direct or indirect base class. CXXIndirectPrimaryBaseSet IndirectPrimaryBases; /// FirstNearlyEmptyVBase - The first nearly empty virtual base class in /// inheritance graph order. Used for determining the primary base class. const CXXRecordDecl *FirstNearlyEmptyVBase; /// VisitedVirtualBases - A set of all the visited virtual bases, used to /// avoid visiting virtual bases more than once. llvm::SmallPtrSet VisitedVirtualBases; /// Valid if UseExternalLayout is true. ExternalLayout External; ItaniumRecordLayoutBuilder(const ASTContext &Context, EmptySubobjectMap *EmptySubobjects) : Context(Context), EmptySubobjects(EmptySubobjects), Size(0), Alignment(CharUnits::One()), PreferredAlignment(CharUnits::One()), UnpackedAlignment(CharUnits::One()), UnadjustedAlignment(CharUnits::One()), UseExternalLayout(false), InferAlignment(false), Packed(false), IsUnion(false), IsMac68kAlign(false), IsNaturalAlign(!Context.getTargetInfo().getTriple().isOSAIX()), IsMsStruct(false), UnfilledBitsInLastUnit(0), LastBitfieldStorageUnitSize(0), MaxFieldAlignment(CharUnits::Zero()), DataSize(0), NonVirtualSize(CharUnits::Zero()), NonVirtualAlignment(CharUnits::One()), PreferredNVAlignment(CharUnits::One()), PaddedFieldSize(CharUnits::Zero()), PrimaryBase(nullptr), PrimaryBaseIsVirtual(false), HasOwnVFPtr(false), HasPackedField(false), HandledFirstNonOverlappingEmptyField(false), FirstNearlyEmptyVBase(nullptr) {} void Layout(const RecordDecl *D); void Layout(const CXXRecordDecl *D); void Layout(const ObjCInterfaceDecl *D); void LayoutFields(const RecordDecl *D); void LayoutField(const FieldDecl *D, bool InsertExtraPadding); void LayoutWideBitField(uint64_t FieldSize, uint64_t StorageUnitSize, bool FieldPacked, const FieldDecl *D); void LayoutBitField(const FieldDecl *D); TargetCXXABI getCXXABI() const { return Context.getTargetInfo().getCXXABI(); } /// BaseSubobjectInfoAllocator - Allocator for BaseSubobjectInfo objects. llvm::SpecificBumpPtrAllocator BaseSubobjectInfoAllocator; typedef llvm::DenseMap BaseSubobjectInfoMapTy; /// VirtualBaseInfo - Map from all the (direct or indirect) virtual bases /// of the class we're laying out to their base subobject info. BaseSubobjectInfoMapTy VirtualBaseInfo; /// NonVirtualBaseInfo - Map from all the direct non-virtual bases of the /// class we're laying out to their base subobject info. BaseSubobjectInfoMapTy NonVirtualBaseInfo; /// ComputeBaseSubobjectInfo - Compute the base subobject information for the /// bases of the given class. void ComputeBaseSubobjectInfo(const CXXRecordDecl *RD); /// ComputeBaseSubobjectInfo - Compute the base subobject information for a /// single class and all of its base classes. BaseSubobjectInfo *ComputeBaseSubobjectInfo(const CXXRecordDecl *RD, bool IsVirtual, BaseSubobjectInfo *Derived); /// DeterminePrimaryBase - Determine the primary base of the given class. void DeterminePrimaryBase(const CXXRecordDecl *RD); void SelectPrimaryVBase(const CXXRecordDecl *RD); void EnsureVTablePointerAlignment(CharUnits UnpackedBaseAlign); /// LayoutNonVirtualBases - Determines the primary base class (if any) and /// lays it out. Will then proceed to lay out all non-virtual base clasess. void LayoutNonVirtualBases(const CXXRecordDecl *RD); /// LayoutNonVirtualBase - Lays out a single non-virtual base. void LayoutNonVirtualBase(const BaseSubobjectInfo *Base); void AddPrimaryVirtualBaseOffsets(const BaseSubobjectInfo *Info, CharUnits Offset); /// LayoutVirtualBases - Lays out all the virtual bases. void LayoutVirtualBases(const CXXRecordDecl *RD, const CXXRecordDecl *MostDerivedClass); /// LayoutVirtualBase - Lays out a single virtual base. void LayoutVirtualBase(const BaseSubobjectInfo *Base); /// LayoutBase - Will lay out a base and return the offset where it was /// placed, in chars. CharUnits LayoutBase(const BaseSubobjectInfo *Base); /// InitializeLayout - Initialize record layout for the given record decl. void InitializeLayout(const Decl *D); /// FinishLayout - Finalize record layout. Adjust record size based on the /// alignment. void FinishLayout(const NamedDecl *D); void UpdateAlignment(CharUnits NewAlignment, CharUnits UnpackedNewAlignment, CharUnits PreferredAlignment); void UpdateAlignment(CharUnits NewAlignment, CharUnits UnpackedNewAlignment) { UpdateAlignment(NewAlignment, UnpackedNewAlignment, NewAlignment); } void UpdateAlignment(CharUnits NewAlignment) { UpdateAlignment(NewAlignment, NewAlignment, NewAlignment); } /// Retrieve the externally-supplied field offset for the given /// field. /// /// \param Field The field whose offset is being queried. /// \param ComputedOffset The offset that we've computed for this field. uint64_t updateExternalFieldOffset(const FieldDecl *Field, uint64_t ComputedOffset); void CheckFieldPadding(uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset, unsigned UnpackedAlign, bool isPacked, const FieldDecl *D); DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID); CharUnits getSize() const { assert(Size % Context.getCharWidth() == 0); return Context.toCharUnitsFromBits(Size); } uint64_t getSizeInBits() const { return Size; } void setSize(CharUnits NewSize) { Size = Context.toBits(NewSize); } void setSize(uint64_t NewSize) { Size = NewSize; } CharUnits getAligment() const { return Alignment; } CharUnits getDataSize() const { assert(DataSize % Context.getCharWidth() == 0); return Context.toCharUnitsFromBits(DataSize); } uint64_t getDataSizeInBits() const { return DataSize; } void setDataSize(CharUnits NewSize) { DataSize = Context.toBits(NewSize); } void setDataSize(uint64_t NewSize) { DataSize = NewSize; } ItaniumRecordLayoutBuilder(const ItaniumRecordLayoutBuilder &) = delete; void operator=(const ItaniumRecordLayoutBuilder &) = delete; }; } // end anonymous namespace void ItaniumRecordLayoutBuilder::SelectPrimaryVBase(const CXXRecordDecl *RD) { for (const auto &I : RD->bases()) { assert(!I.getType()->isDependentType() && "Cannot layout class with dependent bases."); const CXXRecordDecl *Base = I.getType()->getAsCXXRecordDecl(); // Check if this is a nearly empty virtual base. if (I.isVirtual() && Context.isNearlyEmpty(Base)) { // If it's not an indirect primary base, then we've found our primary // base. if (!IndirectPrimaryBases.count(Base)) { PrimaryBase = Base; PrimaryBaseIsVirtual = true; return; } // Is this the first nearly empty virtual base? if (!FirstNearlyEmptyVBase) FirstNearlyEmptyVBase = Base; } SelectPrimaryVBase(Base); if (PrimaryBase) return; } } /// DeterminePrimaryBase - Determine the primary base of the given class. void ItaniumRecordLayoutBuilder::DeterminePrimaryBase(const CXXRecordDecl *RD) { // If the class isn't dynamic, it won't have a primary base. if (!RD->isDynamicClass()) return; // Compute all the primary virtual bases for all of our direct and // indirect bases, and record all their primary virtual base classes. RD->getIndirectPrimaryBases(IndirectPrimaryBases); // If the record has a dynamic base class, attempt to choose a primary base // class. It is the first (in direct base class order) non-virtual dynamic // base class, if one exists. for (const auto &I : RD->bases()) { // Ignore virtual bases. if (I.isVirtual()) continue; const CXXRecordDecl *Base = I.getType()->getAsCXXRecordDecl(); if (Base->isDynamicClass()) { // We found it. PrimaryBase = Base; PrimaryBaseIsVirtual = false; return; } } // Under the Itanium ABI, if there is no non-virtual primary base class, // try to compute the primary virtual base. The primary virtual base is // the first nearly empty virtual base that is not an indirect primary // virtual base class, if one exists. if (RD->getNumVBases() != 0) { SelectPrimaryVBase(RD); if (PrimaryBase) return; } // Otherwise, it is the first indirect primary base class, if one exists. if (FirstNearlyEmptyVBase) { PrimaryBase = FirstNearlyEmptyVBase; PrimaryBaseIsVirtual = true; return; } assert(!PrimaryBase && "Should not get here with a primary base!"); } BaseSubobjectInfo *ItaniumRecordLayoutBuilder::ComputeBaseSubobjectInfo( const CXXRecordDecl *RD, bool IsVirtual, BaseSubobjectInfo *Derived) { BaseSubobjectInfo *Info; if (IsVirtual) { // Check if we already have info about this virtual base. BaseSubobjectInfo *&InfoSlot = VirtualBaseInfo[RD]; if (InfoSlot) { assert(InfoSlot->Class == RD && "Wrong class for virtual base info!"); return InfoSlot; } // We don't, create it. InfoSlot = new (BaseSubobjectInfoAllocator.Allocate()) BaseSubobjectInfo; Info = InfoSlot; } else { Info = new (BaseSubobjectInfoAllocator.Allocate()) BaseSubobjectInfo; } Info->Class = RD; Info->IsVirtual = IsVirtual; Info->Derived = nullptr; Info->PrimaryVirtualBaseInfo = nullptr; const CXXRecordDecl *PrimaryVirtualBase = nullptr; BaseSubobjectInfo *PrimaryVirtualBaseInfo = nullptr; // Check if this base has a primary virtual base. if (RD->getNumVBases()) { const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); if (Layout.isPrimaryBaseVirtual()) { // This base does have a primary virtual base. PrimaryVirtualBase = Layout.getPrimaryBase(); assert(PrimaryVirtualBase && "Didn't have a primary virtual base!"); // Now check if we have base subobject info about this primary base. PrimaryVirtualBaseInfo = VirtualBaseInfo.lookup(PrimaryVirtualBase); if (PrimaryVirtualBaseInfo) { if (PrimaryVirtualBaseInfo->Derived) { // We did have info about this primary base, and it turns out that it // has already been claimed as a primary virtual base for another // base. PrimaryVirtualBase = nullptr; } else { // We can claim this base as our primary base. Info->PrimaryVirtualBaseInfo = PrimaryVirtualBaseInfo; PrimaryVirtualBaseInfo->Derived = Info; } } } } // Now go through all direct bases. for (const auto &I : RD->bases()) { bool IsVirtual = I.isVirtual(); const CXXRecordDecl *BaseDecl = I.getType()->getAsCXXRecordDecl(); Info->Bases.push_back(ComputeBaseSubobjectInfo(BaseDecl, IsVirtual, Info)); } if (PrimaryVirtualBase && !PrimaryVirtualBaseInfo) { // Traversing the bases must have created the base info for our primary // virtual base. PrimaryVirtualBaseInfo = VirtualBaseInfo.lookup(PrimaryVirtualBase); assert(PrimaryVirtualBaseInfo && "Did not create a primary virtual base!"); // Claim the primary virtual base as our primary virtual base. Info->PrimaryVirtualBaseInfo = PrimaryVirtualBaseInfo; PrimaryVirtualBaseInfo->Derived = Info; } return Info; } void ItaniumRecordLayoutBuilder::ComputeBaseSubobjectInfo( const CXXRecordDecl *RD) { for (const auto &I : RD->bases()) { bool IsVirtual = I.isVirtual(); const CXXRecordDecl *BaseDecl = I.getType()->getAsCXXRecordDecl(); // Compute the base subobject info for this base. BaseSubobjectInfo *Info = ComputeBaseSubobjectInfo(BaseDecl, IsVirtual, nullptr); if (IsVirtual) { // ComputeBaseInfo has already added this base for us. assert(VirtualBaseInfo.count(BaseDecl) && "Did not add virtual base!"); } else { // Add the base info to the map of non-virtual bases. assert(!NonVirtualBaseInfo.count(BaseDecl) && "Non-virtual base already exists!"); NonVirtualBaseInfo.insert(std::make_pair(BaseDecl, Info)); } } } void ItaniumRecordLayoutBuilder::EnsureVTablePointerAlignment( CharUnits UnpackedBaseAlign) { CharUnits BaseAlign = Packed ? CharUnits::One() : UnpackedBaseAlign; // The maximum field alignment overrides base align. if (!MaxFieldAlignment.isZero()) { BaseAlign = std::min(BaseAlign, MaxFieldAlignment); UnpackedBaseAlign = std::min(UnpackedBaseAlign, MaxFieldAlignment); } // Round up the current record size to pointer alignment. setSize(getSize().alignTo(BaseAlign)); // Update the alignment. UpdateAlignment(BaseAlign, UnpackedBaseAlign, BaseAlign); } void ItaniumRecordLayoutBuilder::LayoutNonVirtualBases( const CXXRecordDecl *RD) { // Then, determine the primary base class. DeterminePrimaryBase(RD); // Compute base subobject info. ComputeBaseSubobjectInfo(RD); // If we have a primary base class, lay it out. if (PrimaryBase) { if (PrimaryBaseIsVirtual) { // If the primary virtual base was a primary virtual base of some other // base class we'll have to steal it. BaseSubobjectInfo *PrimaryBaseInfo = VirtualBaseInfo.lookup(PrimaryBase); PrimaryBaseInfo->Derived = nullptr; // We have a virtual primary base, insert it as an indirect primary base. IndirectPrimaryBases.insert(PrimaryBase); assert(!VisitedVirtualBases.count(PrimaryBase) && "vbase already visited!"); VisitedVirtualBases.insert(PrimaryBase); LayoutVirtualBase(PrimaryBaseInfo); } else { BaseSubobjectInfo *PrimaryBaseInfo = NonVirtualBaseInfo.lookup(PrimaryBase); assert(PrimaryBaseInfo && "Did not find base info for non-virtual primary base!"); LayoutNonVirtualBase(PrimaryBaseInfo); } // If this class needs a vtable/vf-table and didn't get one from a // primary base, add it in now. } else if (RD->isDynamicClass()) { assert(DataSize == 0 && "Vtable pointer must be at offset zero!"); CharUnits PtrWidth = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0)); CharUnits PtrAlign = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerAlign(0)); EnsureVTablePointerAlignment(PtrAlign); HasOwnVFPtr = true; assert(!IsUnion && "Unions cannot be dynamic classes."); HandledFirstNonOverlappingEmptyField = true; setSize(getSize() + PtrWidth); setDataSize(getSize()); } // Now lay out the non-virtual bases. for (const auto &I : RD->bases()) { // Ignore virtual bases. if (I.isVirtual()) continue; const CXXRecordDecl *BaseDecl = I.getType()->getAsCXXRecordDecl(); // Skip the primary base, because we've already laid it out. The // !PrimaryBaseIsVirtual check is required because we might have a // non-virtual base of the same type as a primary virtual base. if (BaseDecl == PrimaryBase && !PrimaryBaseIsVirtual) continue; // Lay out the base. BaseSubobjectInfo *BaseInfo = NonVirtualBaseInfo.lookup(BaseDecl); assert(BaseInfo && "Did not find base info for non-virtual base!"); LayoutNonVirtualBase(BaseInfo); } } void ItaniumRecordLayoutBuilder::LayoutNonVirtualBase( const BaseSubobjectInfo *Base) { // Layout the base. CharUnits Offset = LayoutBase(Base); // Add its base class offset. assert(!Bases.count(Base->Class) && "base offset already exists!"); Bases.insert(std::make_pair(Base->Class, Offset)); AddPrimaryVirtualBaseOffsets(Base, Offset); } void ItaniumRecordLayoutBuilder::AddPrimaryVirtualBaseOffsets( const BaseSubobjectInfo *Info, CharUnits Offset) { // This base isn't interesting, it has no virtual bases. if (!Info->Class->getNumVBases()) return; // First, check if we have a virtual primary base to add offsets for. if (Info->PrimaryVirtualBaseInfo) { assert(Info->PrimaryVirtualBaseInfo->IsVirtual && "Primary virtual base is not virtual!"); if (Info->PrimaryVirtualBaseInfo->Derived == Info) { // Add the offset. assert(!VBases.count(Info->PrimaryVirtualBaseInfo->Class) && "primary vbase offset already exists!"); VBases.insert(std::make_pair(Info->PrimaryVirtualBaseInfo->Class, ASTRecordLayout::VBaseInfo(Offset, false))); // Traverse the primary virtual base. AddPrimaryVirtualBaseOffsets(Info->PrimaryVirtualBaseInfo, Offset); } } // Now go through all direct non-virtual bases. const ASTRecordLayout &Layout = Context.getASTRecordLayout(Info->Class); for (const BaseSubobjectInfo *Base : Info->Bases) { if (Base->IsVirtual) continue; CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base->Class); AddPrimaryVirtualBaseOffsets(Base, BaseOffset); } } void ItaniumRecordLayoutBuilder::LayoutVirtualBases( const CXXRecordDecl *RD, const CXXRecordDecl *MostDerivedClass) { const CXXRecordDecl *PrimaryBase; bool PrimaryBaseIsVirtual; if (MostDerivedClass == RD) { PrimaryBase = this->PrimaryBase; PrimaryBaseIsVirtual = this->PrimaryBaseIsVirtual; } else { const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); PrimaryBase = Layout.getPrimaryBase(); PrimaryBaseIsVirtual = Layout.isPrimaryBaseVirtual(); } for (const CXXBaseSpecifier &Base : RD->bases()) { assert(!Base.getType()->isDependentType() && "Cannot layout class with dependent bases."); const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); if (Base.isVirtual()) { if (PrimaryBase != BaseDecl || !PrimaryBaseIsVirtual) { bool IndirectPrimaryBase = IndirectPrimaryBases.count(BaseDecl); // Only lay out the virtual base if it's not an indirect primary base. if (!IndirectPrimaryBase) { // Only visit virtual bases once. if (!VisitedVirtualBases.insert(BaseDecl).second) continue; const BaseSubobjectInfo *BaseInfo = VirtualBaseInfo.lookup(BaseDecl); assert(BaseInfo && "Did not find virtual base info!"); LayoutVirtualBase(BaseInfo); } } } if (!BaseDecl->getNumVBases()) { // This base isn't interesting since it doesn't have any virtual bases. continue; } LayoutVirtualBases(BaseDecl, MostDerivedClass); } } void ItaniumRecordLayoutBuilder::LayoutVirtualBase( const BaseSubobjectInfo *Base) { assert(!Base->Derived && "Trying to lay out a primary virtual base!"); // Layout the base. CharUnits Offset = LayoutBase(Base); // Add its base class offset. assert(!VBases.count(Base->Class) && "vbase offset already exists!"); VBases.insert(std::make_pair(Base->Class, ASTRecordLayout::VBaseInfo(Offset, false))); AddPrimaryVirtualBaseOffsets(Base, Offset); } CharUnits ItaniumRecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) { assert(!IsUnion && "Unions cannot have base classes."); const ASTRecordLayout &Layout = Context.getASTRecordLayout(Base->Class); CharUnits Offset; // Query the external layout to see if it provides an offset. bool HasExternalLayout = false; if (UseExternalLayout) { if (Base->IsVirtual) HasExternalLayout = External.getExternalVBaseOffset(Base->Class, Offset); else HasExternalLayout = External.getExternalNVBaseOffset(Base->Class, Offset); } auto getBaseOrPreferredBaseAlignFromUnpacked = [&](CharUnits UnpackedAlign) { // Clang <= 6 incorrectly applied the 'packed' attribute to base classes. // Per GCC's documentation, it only applies to non-static data members. return (Packed && ((Context.getLangOpts().getClangABICompat() <= LangOptions::ClangABI::Ver6) || Context.getTargetInfo().getTriple().isPS4() || Context.getTargetInfo().getTriple().isOSAIX())) ? CharUnits::One() : UnpackedAlign; }; CharUnits UnpackedBaseAlign = Layout.getNonVirtualAlignment(); CharUnits UnpackedPreferredBaseAlign = Layout.getPreferredNVAlignment(); CharUnits BaseAlign = getBaseOrPreferredBaseAlignFromUnpacked(UnpackedBaseAlign); CharUnits PreferredBaseAlign = getBaseOrPreferredBaseAlignFromUnpacked(UnpackedPreferredBaseAlign); const bool DefaultsToAIXPowerAlignment = Context.getTargetInfo().defaultsToAIXPowerAlignment(); if (DefaultsToAIXPowerAlignment) { // AIX `power` alignment does not apply the preferred alignment for // non-union classes if the source of the alignment (the current base in // this context) follows introduction of the first subobject with // exclusively allocated space or zero-extent array. if (!Base->Class->isEmpty() && !HandledFirstNonOverlappingEmptyField) { // By handling a base class that is not empty, we're handling the // "first (inherited) member". HandledFirstNonOverlappingEmptyField = true; } else if (!IsNaturalAlign) { UnpackedPreferredBaseAlign = UnpackedBaseAlign; PreferredBaseAlign = BaseAlign; } } CharUnits UnpackedAlignTo = !DefaultsToAIXPowerAlignment ? UnpackedBaseAlign : UnpackedPreferredBaseAlign; // If we have an empty base class, try to place it at offset 0. if (Base->Class->isEmpty() && (!HasExternalLayout || Offset == CharUnits::Zero()) && EmptySubobjects->CanPlaceBaseAtOffset(Base, CharUnits::Zero())) { setSize(std::max(getSize(), Layout.getSize())); UpdateAlignment(BaseAlign, UnpackedAlignTo, PreferredBaseAlign); return CharUnits::Zero(); } // The maximum field alignment overrides the base align/(AIX-only) preferred // base align. if (!MaxFieldAlignment.isZero()) { BaseAlign = std::min(BaseAlign, MaxFieldAlignment); PreferredBaseAlign = std::min(PreferredBaseAlign, MaxFieldAlignment); UnpackedAlignTo = std::min(UnpackedAlignTo, MaxFieldAlignment); } CharUnits AlignTo = !DefaultsToAIXPowerAlignment ? BaseAlign : PreferredBaseAlign; if (!HasExternalLayout) { // Round up the current record size to the base's alignment boundary. Offset = getDataSize().alignTo(AlignTo); // Try to place the base. while (!EmptySubobjects->CanPlaceBaseAtOffset(Base, Offset)) Offset += AlignTo; } else { bool Allowed = EmptySubobjects->CanPlaceBaseAtOffset(Base, Offset); (void)Allowed; assert(Allowed && "Base subobject externally placed at overlapping offset"); if (InferAlignment && Offset < getDataSize().alignTo(AlignTo)) { // The externally-supplied base offset is before the base offset we // computed. Assume that the structure is packed. Alignment = CharUnits::One(); InferAlignment = false; } } if (!Base->Class->isEmpty()) { // Update the data size. setDataSize(Offset + Layout.getNonVirtualSize()); setSize(std::max(getSize(), getDataSize())); } else setSize(std::max(getSize(), Offset + Layout.getSize())); // Remember max struct/class alignment. UpdateAlignment(BaseAlign, UnpackedAlignTo, PreferredBaseAlign); return Offset; } void ItaniumRecordLayoutBuilder::InitializeLayout(const Decl *D) { if (const RecordDecl *RD = dyn_cast(D)) { IsUnion = RD->isUnion(); IsMsStruct = RD->isMsStruct(Context); } Packed = D->hasAttr(); // Honor the default struct packing maximum alignment flag. if (unsigned DefaultMaxFieldAlignment = Context.getLangOpts().PackStruct) { MaxFieldAlignment = CharUnits::fromQuantity(DefaultMaxFieldAlignment); } // mac68k alignment supersedes maximum field alignment and attribute aligned, // and forces all structures to have 2-byte alignment. The IBM docs on it // allude to additional (more complicated) semantics, especially with regard // to bit-fields, but gcc appears not to follow that. if (D->hasAttr()) { assert( !D->hasAttr() && "Having both mac68k and natural alignment on a decl is not allowed."); IsMac68kAlign = true; MaxFieldAlignment = CharUnits::fromQuantity(2); Alignment = CharUnits::fromQuantity(2); PreferredAlignment = CharUnits::fromQuantity(2); } else { if (D->hasAttr()) IsNaturalAlign = true; if (const MaxFieldAlignmentAttr *MFAA = D->getAttr()) MaxFieldAlignment = Context.toCharUnitsFromBits(MFAA->getAlignment()); if (unsigned MaxAlign = D->getMaxAlignment()) UpdateAlignment(Context.toCharUnitsFromBits(MaxAlign)); } HandledFirstNonOverlappingEmptyField = !Context.getTargetInfo().defaultsToAIXPowerAlignment() || IsNaturalAlign; // If there is an external AST source, ask it for the various offsets. if (const RecordDecl *RD = dyn_cast(D)) if (ExternalASTSource *Source = Context.getExternalSource()) { UseExternalLayout = Source->layoutRecordType( RD, External.Size, External.Align, External.FieldOffsets, External.BaseOffsets, External.VirtualBaseOffsets); // Update based on external alignment. if (UseExternalLayout) { if (External.Align > 0) { Alignment = Context.toCharUnitsFromBits(External.Align); PreferredAlignment = Context.toCharUnitsFromBits(External.Align); } else { // The external source didn't have alignment information; infer it. InferAlignment = true; } } } } void ItaniumRecordLayoutBuilder::Layout(const RecordDecl *D) { InitializeLayout(D); LayoutFields(D); // Finally, round the size of the total struct up to the alignment of the // struct itself. FinishLayout(D); } void ItaniumRecordLayoutBuilder::Layout(const CXXRecordDecl *RD) { InitializeLayout(RD); // Lay out the vtable and the non-virtual bases. LayoutNonVirtualBases(RD); LayoutFields(RD); NonVirtualSize = Context.toCharUnitsFromBits( llvm::alignTo(getSizeInBits(), Context.getTargetInfo().getCharAlign())); NonVirtualAlignment = Alignment; PreferredNVAlignment = PreferredAlignment; // Lay out the virtual bases and add the primary virtual base offsets. LayoutVirtualBases(RD, RD); // Finally, round the size of the total struct up to the alignment // of the struct itself. FinishLayout(RD); #ifndef NDEBUG // Check that we have base offsets for all bases. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); assert(Bases.count(BaseDecl) && "Did not find base offset!"); } // And all virtual bases. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); assert(VBases.count(BaseDecl) && "Did not find base offset!"); } #endif } void ItaniumRecordLayoutBuilder::Layout(const ObjCInterfaceDecl *D) { if (ObjCInterfaceDecl *SD = D->getSuperClass()) { const ASTRecordLayout &SL = Context.getASTObjCInterfaceLayout(SD); UpdateAlignment(SL.getAlignment()); // We start laying out ivars not at the end of the superclass // structure, but at the next byte following the last field. setDataSize(SL.getDataSize()); setSize(getDataSize()); } InitializeLayout(D); // Layout each ivar sequentially. for (const ObjCIvarDecl *IVD = D->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) LayoutField(IVD, false); // Finally, round the size of the total struct up to the alignment of the // struct itself. FinishLayout(D); } void ItaniumRecordLayoutBuilder::LayoutFields(const RecordDecl *D) { // Layout each field, for now, just sequentially, respecting alignment. In // the future, this will need to be tweakable by targets. bool InsertExtraPadding = D->mayInsertExtraPadding(/*EmitRemark=*/true); bool HasFlexibleArrayMember = D->hasFlexibleArrayMember(); for (auto I = D->field_begin(), End = D->field_end(); I != End; ++I) { auto Next(I); ++Next; LayoutField(*I, InsertExtraPadding && (Next != End || !HasFlexibleArrayMember)); } } // Rounds the specified size to have it a multiple of the char size. static uint64_t roundUpSizeToCharAlignment(uint64_t Size, const ASTContext &Context) { uint64_t CharAlignment = Context.getTargetInfo().getCharAlign(); return llvm::alignTo(Size, CharAlignment); } void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize, uint64_t StorageUnitSize, bool FieldPacked, const FieldDecl *D) { assert(Context.getLangOpts().CPlusPlus && "Can only have wide bit-fields in C++!"); // Itanium C++ ABI 2.4: // If sizeof(T)*8 < n, let T' be the largest integral POD type with // sizeof(T')*8 <= n. QualType IntegralPODTypes[] = { Context.UnsignedCharTy, Context.UnsignedShortTy, Context.UnsignedIntTy, Context.UnsignedLongTy, Context.UnsignedLongLongTy }; QualType Type; for (const QualType &QT : IntegralPODTypes) { uint64_t Size = Context.getTypeSize(QT); if (Size > FieldSize) break; Type = QT; } assert(!Type.isNull() && "Did not find a type!"); CharUnits TypeAlign = Context.getTypeAlignInChars(Type); // We're not going to use any of the unfilled bits in the last byte. UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; uint64_t FieldOffset; uint64_t UnpaddedFieldOffset = getDataSizeInBits() - UnfilledBitsInLastUnit; if (IsUnion) { uint64_t RoundedFieldSize = roundUpSizeToCharAlignment(FieldSize, Context); setDataSize(std::max(getDataSizeInBits(), RoundedFieldSize)); FieldOffset = 0; } else { // The bitfield is allocated starting at the next offset aligned // appropriately for T', with length n bits. FieldOffset = llvm::alignTo(getDataSizeInBits(), Context.toBits(TypeAlign)); uint64_t NewSizeInBits = FieldOffset + FieldSize; setDataSize( llvm::alignTo(NewSizeInBits, Context.getTargetInfo().getCharAlign())); UnfilledBitsInLastUnit = getDataSizeInBits() - NewSizeInBits; } // Place this field at the current location. FieldOffsets.push_back(FieldOffset); CheckFieldPadding(FieldOffset, UnpaddedFieldOffset, FieldOffset, Context.toBits(TypeAlign), FieldPacked, D); // Update the size. setSize(std::max(getSizeInBits(), getDataSizeInBits())); // Remember max struct/class alignment. UpdateAlignment(TypeAlign); } static bool isAIXLayout(const ASTContext &Context) { return Context.getTargetInfo().getTriple().getOS() == llvm::Triple::AIX; } void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) { bool FieldPacked = Packed || D->hasAttr(); uint64_t FieldSize = D->getBitWidthValue(Context); TypeInfo FieldInfo = Context.getTypeInfo(D->getType()); uint64_t StorageUnitSize = FieldInfo.Width; unsigned FieldAlign = FieldInfo.Align; bool AlignIsRequired = FieldInfo.isAlignRequired(); // UnfilledBitsInLastUnit is the difference between the end of the // last allocated bitfield (i.e. the first bit offset available for // bitfields) and the end of the current data size in bits (i.e. the // first bit offset available for non-bitfields). The current data // size in bits is always a multiple of the char size; additionally, // for ms_struct records it's also a multiple of the // LastBitfieldStorageUnitSize (if set). // The struct-layout algorithm is dictated by the platform ABI, // which in principle could use almost any rules it likes. In // practice, UNIXy targets tend to inherit the algorithm described // in the System V generic ABI. The basic bitfield layout rule in // System V is to place bitfields at the next available bit offset // where the entire bitfield would fit in an aligned storage unit of // the declared type; it's okay if an earlier or later non-bitfield // is allocated in the same storage unit. However, some targets // (those that !useBitFieldTypeAlignment(), e.g. ARM APCS) don't // require this storage unit to be aligned, and therefore always put // the bitfield at the next available bit offset. // ms_struct basically requests a complete replacement of the // platform ABI's struct-layout algorithm, with the high-level goal // of duplicating MSVC's layout. For non-bitfields, this follows // the standard algorithm. The basic bitfield layout rule is to // allocate an entire unit of the bitfield's declared type // (e.g. 'unsigned long'), then parcel it up among successive // bitfields whose declared types have the same size, making a new // unit as soon as the last can no longer store the whole value. // Since it completely replaces the platform ABI's algorithm, // settings like !useBitFieldTypeAlignment() do not apply. // A zero-width bitfield forces the use of a new storage unit for // later bitfields. In general, this occurs by rounding up the // current size of the struct as if the algorithm were about to // place a non-bitfield of the field's formal type. Usually this // does not change the alignment of the struct itself, but it does // on some targets (those that useZeroLengthBitfieldAlignment(), // e.g. ARM). In ms_struct layout, zero-width bitfields are // ignored unless they follow a non-zero-width bitfield. // A field alignment restriction (e.g. from #pragma pack) or // specification (e.g. from __attribute__((aligned))) changes the // formal alignment of the field. For System V, this alters the // required alignment of the notional storage unit that must contain // the bitfield. For ms_struct, this only affects the placement of // new storage units. In both cases, the effect of #pragma pack is // ignored on zero-width bitfields. // On System V, a packed field (e.g. from #pragma pack or // __attribute__((packed))) always uses the next available bit // offset. // In an ms_struct struct, the alignment of a fundamental type is // always equal to its size. This is necessary in order to mimic // the i386 alignment rules on targets which might not fully align // all types (e.g. Darwin PPC32, where alignof(long long) == 4). // First, some simple bookkeeping to perform for ms_struct structs. if (IsMsStruct) { // The field alignment for integer types is always the size. FieldAlign = StorageUnitSize; // If the previous field was not a bitfield, or was a bitfield // with a different storage unit size, or if this field doesn't fit into // the current storage unit, we're done with that storage unit. if (LastBitfieldStorageUnitSize != StorageUnitSize || UnfilledBitsInLastUnit < FieldSize) { // Also, ignore zero-length bitfields after non-bitfields. if (!LastBitfieldStorageUnitSize && !FieldSize) FieldAlign = 1; UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; } } if (isAIXLayout(Context)) { if (StorageUnitSize < Context.getTypeSize(Context.UnsignedIntTy)) { // On AIX, [bool, char, short] bitfields have the same alignment // as [unsigned]. StorageUnitSize = Context.getTypeSize(Context.UnsignedIntTy); } else if (StorageUnitSize > Context.getTypeSize(Context.UnsignedIntTy) && Context.getTargetInfo().getTriple().isArch32Bit() && FieldSize <= 32) { // Under 32-bit compile mode, the bitcontainer is 32 bits if a single // long long bitfield has length no greater than 32 bits. StorageUnitSize = 32; if (!AlignIsRequired) FieldAlign = 32; } if (FieldAlign < StorageUnitSize) { // The bitfield alignment should always be greater than or equal to // bitcontainer size. FieldAlign = StorageUnitSize; } } // If the field is wider than its declared type, it follows // different rules in all cases, except on AIX. // On AIX, wide bitfield follows the same rules as normal bitfield. if (FieldSize > StorageUnitSize && !isAIXLayout(Context)) { LayoutWideBitField(FieldSize, StorageUnitSize, FieldPacked, D); return; } // Compute the next available bit offset. uint64_t FieldOffset = IsUnion ? 0 : (getDataSizeInBits() - UnfilledBitsInLastUnit); // Handle targets that don't honor bitfield type alignment. if (!IsMsStruct && !Context.getTargetInfo().useBitFieldTypeAlignment()) { // Some such targets do honor it on zero-width bitfields. if (FieldSize == 0 && Context.getTargetInfo().useZeroLengthBitfieldAlignment()) { // Some targets don't honor leading zero-width bitfield. if (!IsUnion && FieldOffset == 0 && !Context.getTargetInfo().useLeadingZeroLengthBitfield()) FieldAlign = 1; else { // The alignment to round up to is the max of the field's natural // alignment and a target-specific fixed value (sometimes zero). unsigned ZeroLengthBitfieldBoundary = Context.getTargetInfo().getZeroLengthBitfieldBoundary(); FieldAlign = std::max(FieldAlign, ZeroLengthBitfieldBoundary); } // If that doesn't apply, just ignore the field alignment. } else { FieldAlign = 1; } } // Remember the alignment we would have used if the field were not packed. unsigned UnpackedFieldAlign = FieldAlign; // Ignore the field alignment if the field is packed unless it has zero-size. if (!IsMsStruct && FieldPacked && FieldSize != 0) FieldAlign = 1; // But, if there's an 'aligned' attribute on the field, honor that. unsigned ExplicitFieldAlign = D->getMaxAlignment(); if (ExplicitFieldAlign) { FieldAlign = std::max(FieldAlign, ExplicitFieldAlign); UnpackedFieldAlign = std::max(UnpackedFieldAlign, ExplicitFieldAlign); } // But, if there's a #pragma pack in play, that takes precedent over // even the 'aligned' attribute, for non-zero-width bitfields. unsigned MaxFieldAlignmentInBits = Context.toBits(MaxFieldAlignment); if (!MaxFieldAlignment.isZero() && FieldSize) { UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits); if (FieldPacked) FieldAlign = UnpackedFieldAlign; else FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits); } // But, ms_struct just ignores all of that in unions, even explicit // alignment attributes. if (IsMsStruct && IsUnion) { FieldAlign = UnpackedFieldAlign = 1; } // For purposes of diagnostics, we're going to simultaneously // compute the field offsets that we would have used if we weren't // adding any alignment padding or if the field weren't packed. uint64_t UnpaddedFieldOffset = FieldOffset; uint64_t UnpackedFieldOffset = FieldOffset; // Check if we need to add padding to fit the bitfield within an // allocation unit with the right size and alignment. The rules are // somewhat different here for ms_struct structs. if (IsMsStruct) { // If it's not a zero-width bitfield, and we can fit the bitfield // into the active storage unit (and we haven't already decided to // start a new storage unit), just do so, regardless of any other // other consideration. Otherwise, round up to the right alignment. if (FieldSize == 0 || FieldSize > UnfilledBitsInLastUnit) { FieldOffset = llvm::alignTo(FieldOffset, FieldAlign); UnpackedFieldOffset = llvm::alignTo(UnpackedFieldOffset, UnpackedFieldAlign); UnfilledBitsInLastUnit = 0; } } else { // #pragma pack, with any value, suppresses the insertion of padding. bool AllowPadding = MaxFieldAlignment.isZero(); // Compute the real offset. if (FieldSize == 0 || (AllowPadding && (FieldOffset & (FieldAlign - 1)) + FieldSize > StorageUnitSize)) { FieldOffset = llvm::alignTo(FieldOffset, FieldAlign); } else if (ExplicitFieldAlign && (MaxFieldAlignmentInBits == 0 || ExplicitFieldAlign <= MaxFieldAlignmentInBits) && Context.getTargetInfo().useExplicitBitFieldAlignment()) { // TODO: figure it out what needs to be done on targets that don't honor // bit-field type alignment like ARM APCS ABI. FieldOffset = llvm::alignTo(FieldOffset, ExplicitFieldAlign); } // Repeat the computation for diagnostic purposes. if (FieldSize == 0 || (AllowPadding && (UnpackedFieldOffset & (UnpackedFieldAlign - 1)) + FieldSize > StorageUnitSize)) UnpackedFieldOffset = llvm::alignTo(UnpackedFieldOffset, UnpackedFieldAlign); else if (ExplicitFieldAlign && (MaxFieldAlignmentInBits == 0 || ExplicitFieldAlign <= MaxFieldAlignmentInBits) && Context.getTargetInfo().useExplicitBitFieldAlignment()) UnpackedFieldOffset = llvm::alignTo(UnpackedFieldOffset, ExplicitFieldAlign); } // If we're using external layout, give the external layout a chance // to override this information. if (UseExternalLayout) FieldOffset = updateExternalFieldOffset(D, FieldOffset); // Okay, place the bitfield at the calculated offset. FieldOffsets.push_back(FieldOffset); // Bookkeeping: // Anonymous members don't affect the overall record alignment, // except on targets where they do. if (!IsMsStruct && !Context.getTargetInfo().useZeroLengthBitfieldAlignment() && !D->getIdentifier()) FieldAlign = UnpackedFieldAlign = 1; // On AIX, zero-width bitfields pad out to the natural alignment boundary, // but do not increase the alignment greater than the MaxFieldAlignment, or 1 // if packed. if (isAIXLayout(Context) && !FieldSize) { if (FieldPacked) FieldAlign = 1; if (!MaxFieldAlignment.isZero()) { UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits); FieldAlign = std::min(FieldAlign, MaxFieldAlignmentInBits); } } // Diagnose differences in layout due to padding or packing. if (!UseExternalLayout) CheckFieldPadding(FieldOffset, UnpaddedFieldOffset, UnpackedFieldOffset, UnpackedFieldAlign, FieldPacked, D); // Update DataSize to include the last byte containing (part of) the bitfield. // For unions, this is just a max operation, as usual. if (IsUnion) { // For ms_struct, allocate the entire storage unit --- unless this // is a zero-width bitfield, in which case just use a size of 1. uint64_t RoundedFieldSize; if (IsMsStruct) { RoundedFieldSize = (FieldSize ? StorageUnitSize : Context.getTargetInfo().getCharWidth()); // Otherwise, allocate just the number of bytes required to store // the bitfield. } else { RoundedFieldSize = roundUpSizeToCharAlignment(FieldSize, Context); } setDataSize(std::max(getDataSizeInBits(), RoundedFieldSize)); // For non-zero-width bitfields in ms_struct structs, allocate a new // storage unit if necessary. } else if (IsMsStruct && FieldSize) { // We should have cleared UnfilledBitsInLastUnit in every case // where we changed storage units. if (!UnfilledBitsInLastUnit) { setDataSize(FieldOffset + StorageUnitSize); UnfilledBitsInLastUnit = StorageUnitSize; } UnfilledBitsInLastUnit -= FieldSize; LastBitfieldStorageUnitSize = StorageUnitSize; // Otherwise, bump the data size up to include the bitfield, // including padding up to char alignment, and then remember how // bits we didn't use. } else { uint64_t NewSizeInBits = FieldOffset + FieldSize; uint64_t CharAlignment = Context.getTargetInfo().getCharAlign(); setDataSize(llvm::alignTo(NewSizeInBits, CharAlignment)); UnfilledBitsInLastUnit = getDataSizeInBits() - NewSizeInBits; // The only time we can get here for an ms_struct is if this is a // zero-width bitfield, which doesn't count as anything for the // purposes of unfilled bits. LastBitfieldStorageUnitSize = 0; } // Update the size. setSize(std::max(getSizeInBits(), getDataSizeInBits())); // Remember max struct/class alignment. UnadjustedAlignment = std::max(UnadjustedAlignment, Context.toCharUnitsFromBits(FieldAlign)); UpdateAlignment(Context.toCharUnitsFromBits(FieldAlign), Context.toCharUnitsFromBits(UnpackedFieldAlign)); } void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D, bool InsertExtraPadding) { auto *FieldClass = D->getType()->getAsCXXRecordDecl(); bool PotentiallyOverlapping = D->hasAttr() && FieldClass; bool IsOverlappingEmptyField = PotentiallyOverlapping && FieldClass->isEmpty(); CharUnits FieldOffset = (IsUnion || IsOverlappingEmptyField) ? CharUnits::Zero() : getDataSize(); const bool DefaultsToAIXPowerAlignment = Context.getTargetInfo().defaultsToAIXPowerAlignment(); bool FoundFirstNonOverlappingEmptyFieldForAIX = false; if (DefaultsToAIXPowerAlignment && !HandledFirstNonOverlappingEmptyField) { assert(FieldOffset == CharUnits::Zero() && "The first non-overlapping empty field should have been handled."); if (!IsOverlappingEmptyField) { FoundFirstNonOverlappingEmptyFieldForAIX = true; // We're going to handle the "first member" based on // `FoundFirstNonOverlappingEmptyFieldForAIX` during the current // invocation of this function; record it as handled for future // invocations (except for unions, because the current field does not // represent all "firsts"). HandledFirstNonOverlappingEmptyField = !IsUnion; } } if (D->isBitField()) { LayoutBitField(D); return; } uint64_t UnpaddedFieldOffset = getDataSizeInBits() - UnfilledBitsInLastUnit; // Reset the unfilled bits. UnfilledBitsInLastUnit = 0; LastBitfieldStorageUnitSize = 0; - llvm::Triple Target = Context.getTargetInfo().getTriple(); - bool FieldPacked = (Packed && (!FieldClass || FieldClass->isPOD() || - Context.getLangOpts().getClangABICompat() <= - LangOptions::ClangABI::Ver13 || - Target.isPS4() || Target.isOSDarwin())) || - D->hasAttr(); + bool FieldPacked = Packed || D->hasAttr(); AlignRequirementKind AlignRequirement = AlignRequirementKind::None; CharUnits FieldSize; CharUnits FieldAlign; // The amount of this class's dsize occupied by the field. // This is equal to FieldSize unless we're permitted to pack // into the field's tail padding. CharUnits EffectiveFieldSize; auto setDeclInfo = [&](bool IsIncompleteArrayType) { auto TI = Context.getTypeInfoInChars(D->getType()); FieldAlign = TI.Align; // Flexible array members don't have any size, but they have to be // aligned appropriately for their element type. EffectiveFieldSize = FieldSize = IsIncompleteArrayType ? CharUnits::Zero() : TI.Width; AlignRequirement = TI.AlignRequirement; }; if (D->getType()->isIncompleteArrayType()) { setDeclInfo(true /* IsIncompleteArrayType */); } else if (const ReferenceType *RT = D->getType()->getAs()) { unsigned AS = Context.getTargetAddressSpace(RT->getPointeeType()); EffectiveFieldSize = FieldSize = Context.toCharUnitsFromBits( Context.getTargetInfo().getPointerWidth(AS)); FieldAlign = Context.toCharUnitsFromBits( Context.getTargetInfo().getPointerAlign(AS)); } else { setDeclInfo(false /* IsIncompleteArrayType */); // A potentially-overlapping field occupies its dsize or nvsize, whichever // is larger. if (PotentiallyOverlapping) { const ASTRecordLayout &Layout = Context.getASTRecordLayout(FieldClass); EffectiveFieldSize = std::max(Layout.getNonVirtualSize(), Layout.getDataSize()); } if (IsMsStruct) { // If MS bitfield layout is required, figure out what type is being // laid out and align the field to the width of that type. // Resolve all typedefs down to their base type and round up the field // alignment if necessary. QualType T = Context.getBaseElementType(D->getType()); if (const BuiltinType *BTy = T->getAs()) { CharUnits TypeSize = Context.getTypeSizeInChars(BTy); if (!llvm::isPowerOf2_64(TypeSize.getQuantity())) { assert( !Context.getTargetInfo().getTriple().isWindowsMSVCEnvironment() && "Non PowerOf2 size in MSVC mode"); // Base types with sizes that aren't a power of two don't work // with the layout rules for MS structs. This isn't an issue in // MSVC itself since there are no such base data types there. // On e.g. x86_32 mingw and linux, long double is 12 bytes though. // Any structs involving that data type obviously can't be ABI // compatible with MSVC regardless of how it is laid out. // Since ms_struct can be mass enabled (via a pragma or via the // -mms-bitfields command line parameter), this can trigger for // structs that don't actually need MSVC compatibility, so we // need to be able to sidestep the ms_struct layout for these types. // Since the combination of -mms-bitfields together with structs // like max_align_t (which contains a long double) for mingw is // quite common (and GCC handles it silently), just handle it // silently there. For other targets that have ms_struct enabled // (most probably via a pragma or attribute), trigger a diagnostic // that defaults to an error. if (!Context.getTargetInfo().getTriple().isWindowsGNUEnvironment()) Diag(D->getLocation(), diag::warn_npot_ms_struct); } if (TypeSize > FieldAlign && llvm::isPowerOf2_64(TypeSize.getQuantity())) FieldAlign = TypeSize; } } } // When used as part of a typedef, or together with a 'packed' attribute, the // 'aligned' attribute can be used to decrease alignment. In that case, it // overrides any computed alignment we have, and there is no need to upgrade // the alignment. auto alignedAttrCanDecreaseAIXAlignment = [AlignRequirement, FieldPacked] { // Enum alignment sources can be safely ignored here, because this only // helps decide whether we need the AIX alignment upgrade, which only // applies to floating-point types. return AlignRequirement == AlignRequirementKind::RequiredByTypedef || (AlignRequirement == AlignRequirementKind::RequiredByRecord && FieldPacked); }; // The AIX `power` alignment rules apply the natural alignment of the // "first member" if it is of a floating-point data type (or is an aggregate // whose recursively "first" member or element is such a type). The alignment // associated with these types for subsequent members use an alignment value // where the floating-point data type is considered to have 4-byte alignment. // // For the purposes of the foregoing: vtable pointers, non-empty base classes, // and zero-width bit-fields count as prior members; members of empty class // types marked `no_unique_address` are not considered to be prior members. CharUnits PreferredAlign = FieldAlign; if (DefaultsToAIXPowerAlignment && !alignedAttrCanDecreaseAIXAlignment() && (FoundFirstNonOverlappingEmptyFieldForAIX || IsNaturalAlign)) { auto performBuiltinTypeAlignmentUpgrade = [&](const BuiltinType *BTy) { if (BTy->getKind() == BuiltinType::Double || BTy->getKind() == BuiltinType::LongDouble) { assert(PreferredAlign == CharUnits::fromQuantity(4) && "No need to upgrade the alignment value."); PreferredAlign = CharUnits::fromQuantity(8); } }; const Type *BaseTy = D->getType()->getBaseElementTypeUnsafe(); if (const ComplexType *CTy = BaseTy->getAs()) { performBuiltinTypeAlignmentUpgrade( CTy->getElementType()->castAs()); } else if (const BuiltinType *BTy = BaseTy->getAs()) { performBuiltinTypeAlignmentUpgrade(BTy); } else if (const RecordType *RT = BaseTy->getAs()) { const RecordDecl *RD = RT->getDecl(); assert(RD && "Expected non-null RecordDecl."); const ASTRecordLayout &FieldRecord = Context.getASTRecordLayout(RD); PreferredAlign = FieldRecord.getPreferredAlignment(); } } // The align if the field is not packed. This is to check if the attribute // was unnecessary (-Wpacked). CharUnits UnpackedFieldAlign = !DefaultsToAIXPowerAlignment ? FieldAlign : PreferredAlign; CharUnits UnpackedFieldOffset = FieldOffset; CharUnits OriginalFieldAlign = UnpackedFieldAlign; if (FieldPacked) { FieldAlign = CharUnits::One(); PreferredAlign = CharUnits::One(); } CharUnits MaxAlignmentInChars = Context.toCharUnitsFromBits(D->getMaxAlignment()); FieldAlign = std::max(FieldAlign, MaxAlignmentInChars); PreferredAlign = std::max(PreferredAlign, MaxAlignmentInChars); UnpackedFieldAlign = std::max(UnpackedFieldAlign, MaxAlignmentInChars); // The maximum field alignment overrides the aligned attribute. if (!MaxFieldAlignment.isZero()) { FieldAlign = std::min(FieldAlign, MaxFieldAlignment); PreferredAlign = std::min(PreferredAlign, MaxFieldAlignment); UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignment); } CharUnits AlignTo = !DefaultsToAIXPowerAlignment ? FieldAlign : PreferredAlign; // Round up the current record size to the field's alignment boundary. FieldOffset = FieldOffset.alignTo(AlignTo); UnpackedFieldOffset = UnpackedFieldOffset.alignTo(UnpackedFieldAlign); if (UseExternalLayout) { FieldOffset = Context.toCharUnitsFromBits( updateExternalFieldOffset(D, Context.toBits(FieldOffset))); if (!IsUnion && EmptySubobjects) { // Record the fact that we're placing a field at this offset. bool Allowed = EmptySubobjects->CanPlaceFieldAtOffset(D, FieldOffset); (void)Allowed; assert(Allowed && "Externally-placed field cannot be placed here"); } } else { if (!IsUnion && EmptySubobjects) { // Check if we can place the field at this offset. while (!EmptySubobjects->CanPlaceFieldAtOffset(D, FieldOffset)) { // We couldn't place the field at the offset. Try again at a new offset. // We try offset 0 (for an empty field) and then dsize(C) onwards. if (FieldOffset == CharUnits::Zero() && getDataSize() != CharUnits::Zero()) FieldOffset = getDataSize().alignTo(AlignTo); else FieldOffset += AlignTo; } } } // Place this field at the current location. FieldOffsets.push_back(Context.toBits(FieldOffset)); if (!UseExternalLayout) CheckFieldPadding(Context.toBits(FieldOffset), UnpaddedFieldOffset, Context.toBits(UnpackedFieldOffset), Context.toBits(UnpackedFieldAlign), FieldPacked, D); if (InsertExtraPadding) { CharUnits ASanAlignment = CharUnits::fromQuantity(8); CharUnits ExtraSizeForAsan = ASanAlignment; if (FieldSize % ASanAlignment) ExtraSizeForAsan += ASanAlignment - CharUnits::fromQuantity(FieldSize % ASanAlignment); EffectiveFieldSize = FieldSize = FieldSize + ExtraSizeForAsan; } // Reserve space for this field. if (!IsOverlappingEmptyField) { uint64_t EffectiveFieldSizeInBits = Context.toBits(EffectiveFieldSize); if (IsUnion) setDataSize(std::max(getDataSizeInBits(), EffectiveFieldSizeInBits)); else setDataSize(FieldOffset + EffectiveFieldSize); PaddedFieldSize = std::max(PaddedFieldSize, FieldOffset + FieldSize); setSize(std::max(getSizeInBits(), getDataSizeInBits())); } else { setSize(std::max(getSizeInBits(), (uint64_t)Context.toBits(FieldOffset + FieldSize))); } // Remember max struct/class ABI-specified alignment. UnadjustedAlignment = std::max(UnadjustedAlignment, FieldAlign); UpdateAlignment(FieldAlign, UnpackedFieldAlign, PreferredAlign); // For checking the alignment of inner fields against // the alignment of its parent record. if (const RecordDecl *RD = D->getParent()) { // Check if packed attribute or pragma pack is present. if (RD->hasAttr() || !MaxFieldAlignment.isZero()) if (FieldAlign < OriginalFieldAlign) if (D->getType()->isRecordType()) { // If the offset is a multiple of the alignment of // the type, raise the warning. // TODO: Takes no account the alignment of the outer struct if (FieldOffset % OriginalFieldAlign != 0) Diag(D->getLocation(), diag::warn_unaligned_access) << Context.getTypeDeclType(RD) << D->getName() << D->getType(); } } } void ItaniumRecordLayoutBuilder::FinishLayout(const NamedDecl *D) { // In C++, records cannot be of size 0. if (Context.getLangOpts().CPlusPlus && getSizeInBits() == 0) { if (const CXXRecordDecl *RD = dyn_cast(D)) { // Compatibility with gcc requires a class (pod or non-pod) // which is not empty but of size 0; such as having fields of // array of zero-length, remains of Size 0 if (RD->isEmpty()) setSize(CharUnits::One()); } else setSize(CharUnits::One()); } // If we have any remaining field tail padding, include that in the overall // size. setSize(std::max(getSizeInBits(), (uint64_t)Context.toBits(PaddedFieldSize))); // Finally, round the size of the record up to the alignment of the // record itself. uint64_t UnpaddedSize = getSizeInBits() - UnfilledBitsInLastUnit; uint64_t UnpackedSizeInBits = llvm::alignTo(getSizeInBits(), Context.toBits(UnpackedAlignment)); uint64_t RoundedSize = llvm::alignTo( getSizeInBits(), Context.toBits(!Context.getTargetInfo().defaultsToAIXPowerAlignment() ? Alignment : PreferredAlignment)); if (UseExternalLayout) { // If we're inferring alignment, and the external size is smaller than // our size after we've rounded up to alignment, conservatively set the // alignment to 1. if (InferAlignment && External.Size < RoundedSize) { Alignment = CharUnits::One(); PreferredAlignment = CharUnits::One(); InferAlignment = false; } setSize(External.Size); return; } // Set the size to the final size. setSize(RoundedSize); unsigned CharBitNum = Context.getTargetInfo().getCharWidth(); if (const RecordDecl *RD = dyn_cast(D)) { // Warn if padding was introduced to the struct/class/union. if (getSizeInBits() > UnpaddedSize) { unsigned PadSize = getSizeInBits() - UnpaddedSize; bool InBits = true; if (PadSize % CharBitNum == 0) { PadSize = PadSize / CharBitNum; InBits = false; } Diag(RD->getLocation(), diag::warn_padded_struct_size) << Context.getTypeDeclType(RD) << PadSize << (InBits ? 1 : 0); // (byte|bit) } // Warn if we packed it unnecessarily, when the unpacked alignment is not // greater than the one after packing, the size in bits doesn't change and // the offset of each field is identical. if (Packed && UnpackedAlignment <= Alignment && UnpackedSizeInBits == getSizeInBits() && !HasPackedField) Diag(D->getLocation(), diag::warn_unnecessary_packed) << Context.getTypeDeclType(RD); } } void ItaniumRecordLayoutBuilder::UpdateAlignment( CharUnits NewAlignment, CharUnits UnpackedNewAlignment, CharUnits PreferredNewAlignment) { // The alignment is not modified when using 'mac68k' alignment or when // we have an externally-supplied layout that also provides overall alignment. if (IsMac68kAlign || (UseExternalLayout && !InferAlignment)) return; if (NewAlignment > Alignment) { assert(llvm::isPowerOf2_64(NewAlignment.getQuantity()) && "Alignment not a power of 2"); Alignment = NewAlignment; } if (UnpackedNewAlignment > UnpackedAlignment) { assert(llvm::isPowerOf2_64(UnpackedNewAlignment.getQuantity()) && "Alignment not a power of 2"); UnpackedAlignment = UnpackedNewAlignment; } if (PreferredNewAlignment > PreferredAlignment) { assert(llvm::isPowerOf2_64(PreferredNewAlignment.getQuantity()) && "Alignment not a power of 2"); PreferredAlignment = PreferredNewAlignment; } } uint64_t ItaniumRecordLayoutBuilder::updateExternalFieldOffset(const FieldDecl *Field, uint64_t ComputedOffset) { uint64_t ExternalFieldOffset = External.getExternalFieldOffset(Field); if (InferAlignment && ExternalFieldOffset < ComputedOffset) { // The externally-supplied field offset is before the field offset we // computed. Assume that the structure is packed. Alignment = CharUnits::One(); PreferredAlignment = CharUnits::One(); InferAlignment = false; } // Use the externally-supplied field offset. return ExternalFieldOffset; } /// Get diagnostic %select index for tag kind for /// field padding diagnostic message. /// WARNING: Indexes apply to particular diagnostics only! /// /// \returns diagnostic %select index. static unsigned getPaddingDiagFromTagKind(TagTypeKind Tag) { switch (Tag) { case TTK_Struct: return 0; case TTK_Interface: return 1; case TTK_Class: return 2; default: llvm_unreachable("Invalid tag kind for field padding diagnostic!"); } } void ItaniumRecordLayoutBuilder::CheckFieldPadding( uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset, unsigned UnpackedAlign, bool isPacked, const FieldDecl *D) { // We let objc ivars without warning, objc interfaces generally are not used // for padding tricks. if (isa(D)) return; // Don't warn about structs created without a SourceLocation. This can // be done by clients of the AST, such as codegen. if (D->getLocation().isInvalid()) return; unsigned CharBitNum = Context.getTargetInfo().getCharWidth(); // Warn if padding was introduced to the struct/class. if (!IsUnion && Offset > UnpaddedOffset) { unsigned PadSize = Offset - UnpaddedOffset; bool InBits = true; if (PadSize % CharBitNum == 0) { PadSize = PadSize / CharBitNum; InBits = false; } if (D->getIdentifier()) Diag(D->getLocation(), diag::warn_padded_struct_field) << getPaddingDiagFromTagKind(D->getParent()->getTagKind()) << Context.getTypeDeclType(D->getParent()) << PadSize << (InBits ? 1 : 0) // (byte|bit) << D->getIdentifier(); else Diag(D->getLocation(), diag::warn_padded_struct_anon_field) << getPaddingDiagFromTagKind(D->getParent()->getTagKind()) << Context.getTypeDeclType(D->getParent()) << PadSize << (InBits ? 1 : 0); // (byte|bit) } if (isPacked && Offset != UnpackedOffset) { HasPackedField = true; } } static const CXXMethodDecl *computeKeyFunction(ASTContext &Context, const CXXRecordDecl *RD) { // If a class isn't polymorphic it doesn't have a key function. if (!RD->isPolymorphic()) return nullptr; // A class that is not externally visible doesn't have a key function. (Or // at least, there's no point to assigning a key function to such a class; // this doesn't affect the ABI.) if (!RD->isExternallyVisible()) return nullptr; // Template instantiations don't have key functions per Itanium C++ ABI 5.2.6. // Same behavior as GCC. TemplateSpecializationKind TSK = RD->getTemplateSpecializationKind(); if (TSK == TSK_ImplicitInstantiation || TSK == TSK_ExplicitInstantiationDeclaration || TSK == TSK_ExplicitInstantiationDefinition) return nullptr; bool allowInlineFunctions = Context.getTargetInfo().getCXXABI().canKeyFunctionBeInline(); for (const CXXMethodDecl *MD : RD->methods()) { if (!MD->isVirtual()) continue; if (MD->isPure()) continue; // Ignore implicit member functions, they are always marked as inline, but // they don't have a body until they're defined. if (MD->isImplicit()) continue; if (MD->isInlineSpecified() || MD->isConstexpr()) continue; if (MD->hasInlineBody()) continue; // Ignore inline deleted or defaulted functions. if (!MD->isUserProvided()) continue; // In certain ABIs, ignore functions with out-of-line inline definitions. if (!allowInlineFunctions) { const FunctionDecl *Def; if (MD->hasBody(Def) && Def->isInlineSpecified()) continue; } if (Context.getLangOpts().CUDA) { // While compiler may see key method in this TU, during CUDA // compilation we should ignore methods that are not accessible // on this side of compilation. if (Context.getLangOpts().CUDAIsDevice) { // In device mode ignore methods without __device__ attribute. if (!MD->hasAttr()) continue; } else { // In host mode ignore __device__-only methods. if (!MD->hasAttr() && MD->hasAttr()) continue; } } // If the key function is dllimport but the class isn't, then the class has // no key function. The DLL that exports the key function won't export the // vtable in this case. if (MD->hasAttr() && !RD->hasAttr() && !Context.getTargetInfo().hasPS4DLLImportExport()) return nullptr; // We found it. return MD; } return nullptr; } DiagnosticBuilder ItaniumRecordLayoutBuilder::Diag(SourceLocation Loc, unsigned DiagID) { return Context.getDiagnostics().Report(Loc, DiagID); } /// Does the target C++ ABI require us to skip over the tail-padding /// of the given class (considering it as a base class) when allocating /// objects? static bool mustSkipTailPadding(TargetCXXABI ABI, const CXXRecordDecl *RD) { switch (ABI.getTailPaddingUseRules()) { case TargetCXXABI::AlwaysUseTailPadding: return false; case TargetCXXABI::UseTailPaddingUnlessPOD03: // FIXME: To the extent that this is meant to cover the Itanium ABI // rules, we should implement the restrictions about over-sized // bitfields: // // http://itanium-cxx-abi.github.io/cxx-abi/abi.html#POD : // In general, a type is considered a POD for the purposes of // layout if it is a POD type (in the sense of ISO C++ // [basic.types]). However, a POD-struct or POD-union (in the // sense of ISO C++ [class]) with a bitfield member whose // declared width is wider than the declared type of the // bitfield is not a POD for the purpose of layout. Similarly, // an array type is not a POD for the purpose of layout if the // element type of the array is not a POD for the purpose of // layout. // // Where references to the ISO C++ are made in this paragraph, // the Technical Corrigendum 1 version of the standard is // intended. return RD->isPOD(); case TargetCXXABI::UseTailPaddingUnlessPOD11: // This is equivalent to RD->getTypeForDecl().isCXX11PODType(), // but with a lot of abstraction penalty stripped off. This does // assume that these properties are set correctly even in C++98 // mode; fortunately, that is true because we want to assign // consistently semantics to the type-traits intrinsics (or at // least as many of them as possible). return RD->isTrivial() && RD->isCXX11StandardLayout(); } llvm_unreachable("bad tail-padding use kind"); } static bool isMsLayout(const ASTContext &Context) { return Context.getTargetInfo().getCXXABI().isMicrosoft(); } // This section contains an implementation of struct layout that is, up to the // included tests, compatible with cl.exe (2013). The layout produced is // significantly different than those produced by the Itanium ABI. Here we note // the most important differences. // // * The alignment of bitfields in unions is ignored when computing the // alignment of the union. // * The existence of zero-width bitfield that occurs after anything other than // a non-zero length bitfield is ignored. // * There is no explicit primary base for the purposes of layout. All bases // with vfptrs are laid out first, followed by all bases without vfptrs. // * The Itanium equivalent vtable pointers are split into a vfptr (virtual // function pointer) and a vbptr (virtual base pointer). They can each be // shared with a, non-virtual bases. These bases need not be the same. vfptrs // always occur at offset 0. vbptrs can occur at an arbitrary offset and are // placed after the lexicographically last non-virtual base. This placement // is always before fields but can be in the middle of the non-virtual bases // due to the two-pass layout scheme for non-virtual-bases. // * Virtual bases sometimes require a 'vtordisp' field that is laid out before // the virtual base and is used in conjunction with virtual overrides during // construction and destruction. This is always a 4 byte value and is used as // an alternative to constructor vtables. // * vtordisps are allocated in a block of memory with size and alignment equal // to the alignment of the completed structure (before applying __declspec( // align())). The vtordisp always occur at the end of the allocation block, // immediately prior to the virtual base. // * vfptrs are injected after all bases and fields have been laid out. In // order to guarantee proper alignment of all fields, the vfptr injection // pushes all bases and fields back by the alignment imposed by those bases // and fields. This can potentially add a significant amount of padding. // vfptrs are always injected at offset 0. // * vbptrs are injected after all bases and fields have been laid out. In // order to guarantee proper alignment of all fields, the vfptr injection // pushes all bases and fields back by the alignment imposed by those bases // and fields. This can potentially add a significant amount of padding. // vbptrs are injected immediately after the last non-virtual base as // lexicographically ordered in the code. If this site isn't pointer aligned // the vbptr is placed at the next properly aligned location. Enough padding // is added to guarantee a fit. // * The last zero sized non-virtual base can be placed at the end of the // struct (potentially aliasing another object), or may alias with the first // field, even if they are of the same type. // * The last zero size virtual base may be placed at the end of the struct // potentially aliasing another object. // * The ABI attempts to avoid aliasing of zero sized bases by adding padding // between bases or vbases with specific properties. The criteria for // additional padding between two bases is that the first base is zero sized // or ends with a zero sized subobject and the second base is zero sized or // trails with a zero sized base or field (sharing of vfptrs can reorder the // layout of the so the leading base is not always the first one declared). // This rule does take into account fields that are not records, so padding // will occur even if the last field is, e.g. an int. The padding added for // bases is 1 byte. The padding added between vbases depends on the alignment // of the object but is at least 4 bytes (in both 32 and 64 bit modes). // * There is no concept of non-virtual alignment, non-virtual alignment and // alignment are always identical. // * There is a distinction between alignment and required alignment. // __declspec(align) changes the required alignment of a struct. This // alignment is _always_ obeyed, even in the presence of #pragma pack. A // record inherits required alignment from all of its fields and bases. // * __declspec(align) on bitfields has the effect of changing the bitfield's // alignment instead of its required alignment. This is the only known way // to make the alignment of a struct bigger than 8. Interestingly enough // this alignment is also immune to the effects of #pragma pack and can be // used to create structures with large alignment under #pragma pack. // However, because it does not impact required alignment, such a structure, // when used as a field or base, will not be aligned if #pragma pack is // still active at the time of use. // // Known incompatibilities: // * all: #pragma pack between fields in a record // * 2010 and back: If the last field in a record is a bitfield, every object // laid out after the record will have extra padding inserted before it. The // extra padding will have size equal to the size of the storage class of the // bitfield. 0 sized bitfields don't exhibit this behavior and the extra // padding can be avoided by adding a 0 sized bitfield after the non-zero- // sized bitfield. // * 2012 and back: In 64-bit mode, if the alignment of a record is 16 or // greater due to __declspec(align()) then a second layout phase occurs after // The locations of the vf and vb pointers are known. This layout phase // suffers from the "last field is a bitfield" bug in 2010 and results in // _every_ field getting padding put in front of it, potentially including the // vfptr, leaving the vfprt at a non-zero location which results in a fault if // anything tries to read the vftbl. The second layout phase also treats // bitfields as separate entities and gives them each storage rather than // packing them. Additionally, because this phase appears to perform a // (an unstable) sort on the members before laying them out and because merged // bitfields have the same address, the bitfields end up in whatever order // the sort left them in, a behavior we could never hope to replicate. namespace { struct MicrosoftRecordLayoutBuilder { struct ElementInfo { CharUnits Size; CharUnits Alignment; }; typedef llvm::DenseMap BaseOffsetsMapTy; MicrosoftRecordLayoutBuilder(const ASTContext &Context) : Context(Context) {} private: MicrosoftRecordLayoutBuilder(const MicrosoftRecordLayoutBuilder &) = delete; void operator=(const MicrosoftRecordLayoutBuilder &) = delete; public: void layout(const RecordDecl *RD); void cxxLayout(const CXXRecordDecl *RD); /// Initializes size and alignment and honors some flags. void initializeLayout(const RecordDecl *RD); /// Initialized C++ layout, compute alignment and virtual alignment and /// existence of vfptrs and vbptrs. Alignment is needed before the vfptr is /// laid out. void initializeCXXLayout(const CXXRecordDecl *RD); void layoutNonVirtualBases(const CXXRecordDecl *RD); void layoutNonVirtualBase(const CXXRecordDecl *RD, const CXXRecordDecl *BaseDecl, const ASTRecordLayout &BaseLayout, const ASTRecordLayout *&PreviousBaseLayout); void injectVFPtr(const CXXRecordDecl *RD); void injectVBPtr(const CXXRecordDecl *RD); /// Lays out the fields of the record. Also rounds size up to /// alignment. void layoutFields(const RecordDecl *RD); void layoutField(const FieldDecl *FD); void layoutBitField(const FieldDecl *FD); /// Lays out a single zero-width bit-field in the record and handles /// special cases associated with zero-width bit-fields. void layoutZeroWidthBitField(const FieldDecl *FD); void layoutVirtualBases(const CXXRecordDecl *RD); void finalizeLayout(const RecordDecl *RD); /// Gets the size and alignment of a base taking pragma pack and /// __declspec(align) into account. ElementInfo getAdjustedElementInfo(const ASTRecordLayout &Layout); /// Gets the size and alignment of a field taking pragma pack and /// __declspec(align) into account. It also updates RequiredAlignment as a /// side effect because it is most convenient to do so here. ElementInfo getAdjustedElementInfo(const FieldDecl *FD); /// Places a field at an offset in CharUnits. void placeFieldAtOffset(CharUnits FieldOffset) { FieldOffsets.push_back(Context.toBits(FieldOffset)); } /// Places a bitfield at a bit offset. void placeFieldAtBitOffset(uint64_t FieldOffset) { FieldOffsets.push_back(FieldOffset); } /// Compute the set of virtual bases for which vtordisps are required. void computeVtorDispSet( llvm::SmallPtrSetImpl &HasVtorDispSet, const CXXRecordDecl *RD) const; const ASTContext &Context; /// The size of the record being laid out. CharUnits Size; /// The non-virtual size of the record layout. CharUnits NonVirtualSize; /// The data size of the record layout. CharUnits DataSize; /// The current alignment of the record layout. CharUnits Alignment; /// The maximum allowed field alignment. This is set by #pragma pack. CharUnits MaxFieldAlignment; /// The alignment that this record must obey. This is imposed by /// __declspec(align()) on the record itself or one of its fields or bases. CharUnits RequiredAlignment; /// The size of the allocation of the currently active bitfield. /// This value isn't meaningful unless LastFieldIsNonZeroWidthBitfield /// is true. CharUnits CurrentBitfieldSize; /// Offset to the virtual base table pointer (if one exists). CharUnits VBPtrOffset; /// Minimum record size possible. CharUnits MinEmptyStructSize; /// The size and alignment info of a pointer. ElementInfo PointerInfo; /// The primary base class (if one exists). const CXXRecordDecl *PrimaryBase; /// The class we share our vb-pointer with. const CXXRecordDecl *SharedVBPtrBase; /// The collection of field offsets. SmallVector FieldOffsets; /// Base classes and their offsets in the record. BaseOffsetsMapTy Bases; /// virtual base classes and their offsets in the record. ASTRecordLayout::VBaseOffsetsMapTy VBases; /// The number of remaining bits in our last bitfield allocation. /// This value isn't meaningful unless LastFieldIsNonZeroWidthBitfield is /// true. unsigned RemainingBitsInField; bool IsUnion : 1; /// True if the last field laid out was a bitfield and was not 0 /// width. bool LastFieldIsNonZeroWidthBitfield : 1; /// True if the class has its own vftable pointer. bool HasOwnVFPtr : 1; /// True if the class has a vbtable pointer. bool HasVBPtr : 1; /// True if the last sub-object within the type is zero sized or the /// object itself is zero sized. This *does not* count members that are not /// records. Only used for MS-ABI. bool EndsWithZeroSizedObject : 1; /// True if this class is zero sized or first base is zero sized or /// has this property. Only used for MS-ABI. bool LeadsWithZeroSizedBase : 1; /// True if the external AST source provided a layout for this record. bool UseExternalLayout : 1; /// The layout provided by the external AST source. Only active if /// UseExternalLayout is true. ExternalLayout External; }; } // namespace MicrosoftRecordLayoutBuilder::ElementInfo MicrosoftRecordLayoutBuilder::getAdjustedElementInfo( const ASTRecordLayout &Layout) { ElementInfo Info; Info.Alignment = Layout.getAlignment(); // Respect pragma pack. if (!MaxFieldAlignment.isZero()) Info.Alignment = std::min(Info.Alignment, MaxFieldAlignment); // Track zero-sized subobjects here where it's already available. EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject(); // Respect required alignment, this is necessary because we may have adjusted // the alignment in the case of pragma pack. Note that the required alignment // doesn't actually apply to the struct alignment at this point. Alignment = std::max(Alignment, Info.Alignment); RequiredAlignment = std::max(RequiredAlignment, Layout.getRequiredAlignment()); Info.Alignment = std::max(Info.Alignment, Layout.getRequiredAlignment()); Info.Size = Layout.getNonVirtualSize(); return Info; } MicrosoftRecordLayoutBuilder::ElementInfo MicrosoftRecordLayoutBuilder::getAdjustedElementInfo( const FieldDecl *FD) { // Get the alignment of the field type's natural alignment, ignore any // alignment attributes. auto TInfo = Context.getTypeInfoInChars(FD->getType()->getUnqualifiedDesugaredType()); ElementInfo Info{TInfo.Width, TInfo.Align}; // Respect align attributes on the field. CharUnits FieldRequiredAlignment = Context.toCharUnitsFromBits(FD->getMaxAlignment()); // Respect align attributes on the type. if (Context.isAlignmentRequired(FD->getType())) FieldRequiredAlignment = std::max( Context.getTypeAlignInChars(FD->getType()), FieldRequiredAlignment); // Respect attributes applied to subobjects of the field. if (FD->isBitField()) // For some reason __declspec align impacts alignment rather than required // alignment when it is applied to bitfields. Info.Alignment = std::max(Info.Alignment, FieldRequiredAlignment); else { if (auto RT = FD->getType()->getBaseElementTypeUnsafe()->getAs()) { auto const &Layout = Context.getASTRecordLayout(RT->getDecl()); EndsWithZeroSizedObject = Layout.endsWithZeroSizedObject(); FieldRequiredAlignment = std::max(FieldRequiredAlignment, Layout.getRequiredAlignment()); } // Capture required alignment as a side-effect. RequiredAlignment = std::max(RequiredAlignment, FieldRequiredAlignment); } // Respect pragma pack, attribute pack and declspec align if (!MaxFieldAlignment.isZero()) Info.Alignment = std::min(Info.Alignment, MaxFieldAlignment); if (FD->hasAttr()) Info.Alignment = CharUnits::One(); Info.Alignment = std::max(Info.Alignment, FieldRequiredAlignment); return Info; } void MicrosoftRecordLayoutBuilder::layout(const RecordDecl *RD) { // For C record layout, zero-sized records always have size 4. MinEmptyStructSize = CharUnits::fromQuantity(4); initializeLayout(RD); layoutFields(RD); DataSize = Size = Size.alignTo(Alignment); RequiredAlignment = std::max( RequiredAlignment, Context.toCharUnitsFromBits(RD->getMaxAlignment())); finalizeLayout(RD); } void MicrosoftRecordLayoutBuilder::cxxLayout(const CXXRecordDecl *RD) { // The C++ standard says that empty structs have size 1. MinEmptyStructSize = CharUnits::One(); initializeLayout(RD); initializeCXXLayout(RD); layoutNonVirtualBases(RD); layoutFields(RD); injectVBPtr(RD); injectVFPtr(RD); if (HasOwnVFPtr || (HasVBPtr && !SharedVBPtrBase)) Alignment = std::max(Alignment, PointerInfo.Alignment); auto RoundingAlignment = Alignment; if (!MaxFieldAlignment.isZero()) RoundingAlignment = std::min(RoundingAlignment, MaxFieldAlignment); if (!UseExternalLayout) Size = Size.alignTo(RoundingAlignment); NonVirtualSize = Size; RequiredAlignment = std::max( RequiredAlignment, Context.toCharUnitsFromBits(RD->getMaxAlignment())); layoutVirtualBases(RD); finalizeLayout(RD); } void MicrosoftRecordLayoutBuilder::initializeLayout(const RecordDecl *RD) { IsUnion = RD->isUnion(); Size = CharUnits::Zero(); Alignment = CharUnits::One(); // In 64-bit mode we always perform an alignment step after laying out vbases. // In 32-bit mode we do not. The check to see if we need to perform alignment // checks the RequiredAlignment field and performs alignment if it isn't 0. RequiredAlignment = Context.getTargetInfo().getTriple().isArch64Bit() ? CharUnits::One() : CharUnits::Zero(); // Compute the maximum field alignment. MaxFieldAlignment = CharUnits::Zero(); // Honor the default struct packing maximum alignment flag. if (unsigned DefaultMaxFieldAlignment = Context.getLangOpts().PackStruct) MaxFieldAlignment = CharUnits::fromQuantity(DefaultMaxFieldAlignment); // Honor the packing attribute. The MS-ABI ignores pragma pack if its larger // than the pointer size. if (const MaxFieldAlignmentAttr *MFAA = RD->getAttr()){ unsigned PackedAlignment = MFAA->getAlignment(); if (PackedAlignment <= Context.getTargetInfo().getPointerWidth(0)) MaxFieldAlignment = Context.toCharUnitsFromBits(PackedAlignment); } // Packed attribute forces max field alignment to be 1. if (RD->hasAttr()) MaxFieldAlignment = CharUnits::One(); // Try to respect the external layout if present. UseExternalLayout = false; if (ExternalASTSource *Source = Context.getExternalSource()) UseExternalLayout = Source->layoutRecordType( RD, External.Size, External.Align, External.FieldOffsets, External.BaseOffsets, External.VirtualBaseOffsets); } void MicrosoftRecordLayoutBuilder::initializeCXXLayout(const CXXRecordDecl *RD) { EndsWithZeroSizedObject = false; LeadsWithZeroSizedBase = false; HasOwnVFPtr = false; HasVBPtr = false; PrimaryBase = nullptr; SharedVBPtrBase = nullptr; // Calculate pointer size and alignment. These are used for vfptr and vbprt // injection. PointerInfo.Size = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0)); PointerInfo.Alignment = Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerAlign(0)); // Respect pragma pack. if (!MaxFieldAlignment.isZero()) PointerInfo.Alignment = std::min(PointerInfo.Alignment, MaxFieldAlignment); } void MicrosoftRecordLayoutBuilder::layoutNonVirtualBases(const CXXRecordDecl *RD) { // The MS-ABI lays out all bases that contain leading vfptrs before it lays // out any bases that do not contain vfptrs. We implement this as two passes // over the bases. This approach guarantees that the primary base is laid out // first. We use these passes to calculate some additional aggregated // information about the bases, such as required alignment and the presence of // zero sized members. const ASTRecordLayout *PreviousBaseLayout = nullptr; bool HasPolymorphicBaseClass = false; // Iterate through the bases and lay out the non-virtual ones. for (const CXXBaseSpecifier &Base : RD->bases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); HasPolymorphicBaseClass |= BaseDecl->isPolymorphic(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); // Mark and skip virtual bases. if (Base.isVirtual()) { HasVBPtr = true; continue; } // Check for a base to share a VBPtr with. if (!SharedVBPtrBase && BaseLayout.hasVBPtr()) { SharedVBPtrBase = BaseDecl; HasVBPtr = true; } // Only lay out bases with extendable VFPtrs on the first pass. if (!BaseLayout.hasExtendableVFPtr()) continue; // If we don't have a primary base, this one qualifies. if (!PrimaryBase) { PrimaryBase = BaseDecl; LeadsWithZeroSizedBase = BaseLayout.leadsWithZeroSizedBase(); } // Lay out the base. layoutNonVirtualBase(RD, BaseDecl, BaseLayout, PreviousBaseLayout); } // Figure out if we need a fresh VFPtr for this class. if (RD->isPolymorphic()) { if (!HasPolymorphicBaseClass) // This class introduces polymorphism, so we need a vftable to store the // RTTI information. HasOwnVFPtr = true; else if (!PrimaryBase) { // We have a polymorphic base class but can't extend its vftable. Add a // new vfptr if we would use any vftable slots. for (CXXMethodDecl *M : RD->methods()) { if (MicrosoftVTableContext::hasVtableSlot(M) && M->size_overridden_methods() == 0) { HasOwnVFPtr = true; break; } } } } // If we don't have a primary base then we have a leading object that could // itself lead with a zero-sized object, something we track. bool CheckLeadingLayout = !PrimaryBase; // Iterate through the bases and lay out the non-virtual ones. for (const CXXBaseSpecifier &Base : RD->bases()) { if (Base.isVirtual()) continue; const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); // Only lay out bases without extendable VFPtrs on the second pass. if (BaseLayout.hasExtendableVFPtr()) { VBPtrOffset = Bases[BaseDecl] + BaseLayout.getNonVirtualSize(); continue; } // If this is the first layout, check to see if it leads with a zero sized // object. If it does, so do we. if (CheckLeadingLayout) { CheckLeadingLayout = false; LeadsWithZeroSizedBase = BaseLayout.leadsWithZeroSizedBase(); } // Lay out the base. layoutNonVirtualBase(RD, BaseDecl, BaseLayout, PreviousBaseLayout); VBPtrOffset = Bases[BaseDecl] + BaseLayout.getNonVirtualSize(); } // Set our VBPtroffset if we know it at this point. if (!HasVBPtr) VBPtrOffset = CharUnits::fromQuantity(-1); else if (SharedVBPtrBase) { const ASTRecordLayout &Layout = Context.getASTRecordLayout(SharedVBPtrBase); VBPtrOffset = Bases[SharedVBPtrBase] + Layout.getVBPtrOffset(); } } static bool recordUsesEBO(const RecordDecl *RD) { if (!isa(RD)) return false; if (RD->hasAttr()) return true; if (auto *LVA = RD->getAttr()) // TODO: Double check with the next version of MSVC. if (LVA->getVersion() <= LangOptions::MSVC2015) return false; // TODO: Some later version of MSVC will change the default behavior of the // compiler to enable EBO by default. When this happens, we will need an // additional isCompatibleWithMSVC check. return false; } void MicrosoftRecordLayoutBuilder::layoutNonVirtualBase( const CXXRecordDecl *RD, const CXXRecordDecl *BaseDecl, const ASTRecordLayout &BaseLayout, const ASTRecordLayout *&PreviousBaseLayout) { // Insert padding between two bases if the left first one is zero sized or // contains a zero sized subobject and the right is zero sized or one leads // with a zero sized base. bool MDCUsesEBO = recordUsesEBO(RD); if (PreviousBaseLayout && PreviousBaseLayout->endsWithZeroSizedObject() && BaseLayout.leadsWithZeroSizedBase() && !MDCUsesEBO) Size++; ElementInfo Info = getAdjustedElementInfo(BaseLayout); CharUnits BaseOffset; // Respect the external AST source base offset, if present. bool FoundBase = false; if (UseExternalLayout) { FoundBase = External.getExternalNVBaseOffset(BaseDecl, BaseOffset); if (FoundBase) { assert(BaseOffset >= Size && "base offset already allocated"); Size = BaseOffset; } } if (!FoundBase) { if (MDCUsesEBO && BaseDecl->isEmpty()) { assert(BaseLayout.getNonVirtualSize() == CharUnits::Zero()); BaseOffset = CharUnits::Zero(); } else { // Otherwise, lay the base out at the end of the MDC. BaseOffset = Size = Size.alignTo(Info.Alignment); } } Bases.insert(std::make_pair(BaseDecl, BaseOffset)); Size += BaseLayout.getNonVirtualSize(); PreviousBaseLayout = &BaseLayout; } void MicrosoftRecordLayoutBuilder::layoutFields(const RecordDecl *RD) { LastFieldIsNonZeroWidthBitfield = false; for (const FieldDecl *Field : RD->fields()) layoutField(Field); } void MicrosoftRecordLayoutBuilder::layoutField(const FieldDecl *FD) { if (FD->isBitField()) { layoutBitField(FD); return; } LastFieldIsNonZeroWidthBitfield = false; ElementInfo Info = getAdjustedElementInfo(FD); Alignment = std::max(Alignment, Info.Alignment); CharUnits FieldOffset; if (UseExternalLayout) FieldOffset = Context.toCharUnitsFromBits(External.getExternalFieldOffset(FD)); else if (IsUnion) FieldOffset = CharUnits::Zero(); else FieldOffset = Size.alignTo(Info.Alignment); placeFieldAtOffset(FieldOffset); Size = std::max(Size, FieldOffset + Info.Size); } void MicrosoftRecordLayoutBuilder::layoutBitField(const FieldDecl *FD) { unsigned Width = FD->getBitWidthValue(Context); if (Width == 0) { layoutZeroWidthBitField(FD); return; } ElementInfo Info = getAdjustedElementInfo(FD); // Clamp the bitfield to a containable size for the sake of being able // to lay them out. Sema will throw an error. if (Width > Context.toBits(Info.Size)) Width = Context.toBits(Info.Size); // Check to see if this bitfield fits into an existing allocation. Note: // MSVC refuses to pack bitfields of formal types with different sizes // into the same allocation. if (!UseExternalLayout && !IsUnion && LastFieldIsNonZeroWidthBitfield && CurrentBitfieldSize == Info.Size && Width <= RemainingBitsInField) { placeFieldAtBitOffset(Context.toBits(Size) - RemainingBitsInField); RemainingBitsInField -= Width; return; } LastFieldIsNonZeroWidthBitfield = true; CurrentBitfieldSize = Info.Size; if (UseExternalLayout) { auto FieldBitOffset = External.getExternalFieldOffset(FD); placeFieldAtBitOffset(FieldBitOffset); auto NewSize = Context.toCharUnitsFromBits( llvm::alignDown(FieldBitOffset, Context.toBits(Info.Alignment)) + Context.toBits(Info.Size)); Size = std::max(Size, NewSize); Alignment = std::max(Alignment, Info.Alignment); } else if (IsUnion) { placeFieldAtOffset(CharUnits::Zero()); Size = std::max(Size, Info.Size); // TODO: Add a Sema warning that MS ignores bitfield alignment in unions. } else { // Allocate a new block of memory and place the bitfield in it. CharUnits FieldOffset = Size.alignTo(Info.Alignment); placeFieldAtOffset(FieldOffset); Size = FieldOffset + Info.Size; Alignment = std::max(Alignment, Info.Alignment); RemainingBitsInField = Context.toBits(Info.Size) - Width; } } void MicrosoftRecordLayoutBuilder::layoutZeroWidthBitField(const FieldDecl *FD) { // Zero-width bitfields are ignored unless they follow a non-zero-width // bitfield. if (!LastFieldIsNonZeroWidthBitfield) { placeFieldAtOffset(IsUnion ? CharUnits::Zero() : Size); // TODO: Add a Sema warning that MS ignores alignment for zero // sized bitfields that occur after zero-size bitfields or non-bitfields. return; } LastFieldIsNonZeroWidthBitfield = false; ElementInfo Info = getAdjustedElementInfo(FD); if (IsUnion) { placeFieldAtOffset(CharUnits::Zero()); Size = std::max(Size, Info.Size); // TODO: Add a Sema warning that MS ignores bitfield alignment in unions. } else { // Round up the current record size to the field's alignment boundary. CharUnits FieldOffset = Size.alignTo(Info.Alignment); placeFieldAtOffset(FieldOffset); Size = FieldOffset; Alignment = std::max(Alignment, Info.Alignment); } } void MicrosoftRecordLayoutBuilder::injectVBPtr(const CXXRecordDecl *RD) { if (!HasVBPtr || SharedVBPtrBase) return; // Inject the VBPointer at the injection site. CharUnits InjectionSite = VBPtrOffset; // But before we do, make sure it's properly aligned. VBPtrOffset = VBPtrOffset.alignTo(PointerInfo.Alignment); // Determine where the first field should be laid out after the vbptr. CharUnits FieldStart = VBPtrOffset + PointerInfo.Size; // Shift everything after the vbptr down, unless we're using an external // layout. if (UseExternalLayout) { // It is possible that there were no fields or bases located after vbptr, // so the size was not adjusted before. if (Size < FieldStart) Size = FieldStart; return; } // Make sure that the amount we push the fields back by is a multiple of the // alignment. CharUnits Offset = (FieldStart - InjectionSite) .alignTo(std::max(RequiredAlignment, Alignment)); Size += Offset; for (uint64_t &FieldOffset : FieldOffsets) FieldOffset += Context.toBits(Offset); for (BaseOffsetsMapTy::value_type &Base : Bases) if (Base.second >= InjectionSite) Base.second += Offset; } void MicrosoftRecordLayoutBuilder::injectVFPtr(const CXXRecordDecl *RD) { if (!HasOwnVFPtr) return; // Make sure that the amount we push the struct back by is a multiple of the // alignment. CharUnits Offset = PointerInfo.Size.alignTo(std::max(RequiredAlignment, Alignment)); // Push back the vbptr, but increase the size of the object and push back // regular fields by the offset only if not using external record layout. if (HasVBPtr) VBPtrOffset += Offset; if (UseExternalLayout) { // The class may have no bases or fields, but still have a vfptr // (e.g. it's an interface class). The size was not correctly set before // in this case. if (FieldOffsets.empty() && Bases.empty()) Size += Offset; return; } Size += Offset; // If we're using an external layout, the fields offsets have already // accounted for this adjustment. for (uint64_t &FieldOffset : FieldOffsets) FieldOffset += Context.toBits(Offset); for (BaseOffsetsMapTy::value_type &Base : Bases) Base.second += Offset; } void MicrosoftRecordLayoutBuilder::layoutVirtualBases(const CXXRecordDecl *RD) { if (!HasVBPtr) return; // Vtordisps are always 4 bytes (even in 64-bit mode) CharUnits VtorDispSize = CharUnits::fromQuantity(4); CharUnits VtorDispAlignment = VtorDispSize; // vtordisps respect pragma pack. if (!MaxFieldAlignment.isZero()) VtorDispAlignment = std::min(VtorDispAlignment, MaxFieldAlignment); // The alignment of the vtordisp is at least the required alignment of the // entire record. This requirement may be present to support vtordisp // injection. for (const CXXBaseSpecifier &VBase : RD->vbases()) { const CXXRecordDecl *BaseDecl = VBase.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); RequiredAlignment = std::max(RequiredAlignment, BaseLayout.getRequiredAlignment()); } VtorDispAlignment = std::max(VtorDispAlignment, RequiredAlignment); // Compute the vtordisp set. llvm::SmallPtrSet HasVtorDispSet; computeVtorDispSet(HasVtorDispSet, RD); // Iterate through the virtual bases and lay them out. const ASTRecordLayout *PreviousBaseLayout = nullptr; for (const CXXBaseSpecifier &VBase : RD->vbases()) { const CXXRecordDecl *BaseDecl = VBase.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &BaseLayout = Context.getASTRecordLayout(BaseDecl); bool HasVtordisp = HasVtorDispSet.contains(BaseDecl); // Insert padding between two bases if the left first one is zero sized or // contains a zero sized subobject and the right is zero sized or one leads // with a zero sized base. The padding between virtual bases is 4 // bytes (in both 32 and 64 bits modes) and always involves rounding up to // the required alignment, we don't know why. if ((PreviousBaseLayout && PreviousBaseLayout->endsWithZeroSizedObject() && BaseLayout.leadsWithZeroSizedBase() && !recordUsesEBO(RD)) || HasVtordisp) { Size = Size.alignTo(VtorDispAlignment) + VtorDispSize; Alignment = std::max(VtorDispAlignment, Alignment); } // Insert the virtual base. ElementInfo Info = getAdjustedElementInfo(BaseLayout); CharUnits BaseOffset; // Respect the external AST source base offset, if present. if (UseExternalLayout) { if (!External.getExternalVBaseOffset(BaseDecl, BaseOffset)) BaseOffset = Size; } else BaseOffset = Size.alignTo(Info.Alignment); assert(BaseOffset >= Size && "base offset already allocated"); VBases.insert(std::make_pair(BaseDecl, ASTRecordLayout::VBaseInfo(BaseOffset, HasVtordisp))); Size = BaseOffset + BaseLayout.getNonVirtualSize(); PreviousBaseLayout = &BaseLayout; } } void MicrosoftRecordLayoutBuilder::finalizeLayout(const RecordDecl *RD) { // Respect required alignment. Note that in 32-bit mode Required alignment // may be 0 and cause size not to be updated. DataSize = Size; if (!RequiredAlignment.isZero()) { Alignment = std::max(Alignment, RequiredAlignment); auto RoundingAlignment = Alignment; if (!MaxFieldAlignment.isZero()) RoundingAlignment = std::min(RoundingAlignment, MaxFieldAlignment); RoundingAlignment = std::max(RoundingAlignment, RequiredAlignment); Size = Size.alignTo(RoundingAlignment); } if (Size.isZero()) { if (!recordUsesEBO(RD) || !cast(RD)->isEmpty()) { EndsWithZeroSizedObject = true; LeadsWithZeroSizedBase = true; } // Zero-sized structures have size equal to their alignment if a // __declspec(align) came into play. if (RequiredAlignment >= MinEmptyStructSize) Size = Alignment; else Size = MinEmptyStructSize; } if (UseExternalLayout) { Size = Context.toCharUnitsFromBits(External.Size); if (External.Align) Alignment = Context.toCharUnitsFromBits(External.Align); } } // Recursively walks the non-virtual bases of a class and determines if any of // them are in the bases with overridden methods set. static bool RequiresVtordisp(const llvm::SmallPtrSetImpl & BasesWithOverriddenMethods, const CXXRecordDecl *RD) { if (BasesWithOverriddenMethods.count(RD)) return true; // If any of a virtual bases non-virtual bases (recursively) requires a // vtordisp than so does this virtual base. for (const CXXBaseSpecifier &Base : RD->bases()) if (!Base.isVirtual() && RequiresVtordisp(BasesWithOverriddenMethods, Base.getType()->getAsCXXRecordDecl())) return true; return false; } void MicrosoftRecordLayoutBuilder::computeVtorDispSet( llvm::SmallPtrSetImpl &HasVtordispSet, const CXXRecordDecl *RD) const { // /vd2 or #pragma vtordisp(2): Always use vtordisps for virtual bases with // vftables. if (RD->getMSVtorDispMode() == MSVtorDispMode::ForVFTable) { for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(BaseDecl); if (Layout.hasExtendableVFPtr()) HasVtordispSet.insert(BaseDecl); } return; } // If any of our bases need a vtordisp for this type, so do we. Check our // direct bases for vtordisp requirements. for (const CXXBaseSpecifier &Base : RD->bases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); const ASTRecordLayout &Layout = Context.getASTRecordLayout(BaseDecl); for (const auto &bi : Layout.getVBaseOffsetsMap()) if (bi.second.hasVtorDisp()) HasVtordispSet.insert(bi.first); } // We don't introduce any additional vtordisps if either: // * A user declared constructor or destructor aren't declared. // * #pragma vtordisp(0) or the /vd0 flag are in use. if ((!RD->hasUserDeclaredConstructor() && !RD->hasUserDeclaredDestructor()) || RD->getMSVtorDispMode() == MSVtorDispMode::Never) return; // /vd1 or #pragma vtordisp(1): Try to guess based on whether we think it's // possible for a partially constructed object with virtual base overrides to // escape a non-trivial constructor. assert(RD->getMSVtorDispMode() == MSVtorDispMode::ForVBaseOverride); // Compute a set of base classes which define methods we override. A virtual // base in this set will require a vtordisp. A virtual base that transitively // contains one of these bases as a non-virtual base will also require a // vtordisp. llvm::SmallPtrSet Work; llvm::SmallPtrSet BasesWithOverriddenMethods; // Seed the working set with our non-destructor, non-pure virtual methods. for (const CXXMethodDecl *MD : RD->methods()) if (MicrosoftVTableContext::hasVtableSlot(MD) && !isa(MD) && !MD->isPure()) Work.insert(MD); while (!Work.empty()) { const CXXMethodDecl *MD = *Work.begin(); auto MethodRange = MD->overridden_methods(); // If a virtual method has no-overrides it lives in its parent's vtable. if (MethodRange.begin() == MethodRange.end()) BasesWithOverriddenMethods.insert(MD->getParent()); else Work.insert(MethodRange.begin(), MethodRange.end()); // We've finished processing this element, remove it from the working set. Work.erase(MD); } // For each of our virtual bases, check if it is in the set of overridden // bases or if it transitively contains a non-virtual base that is. for (const CXXBaseSpecifier &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); if (!HasVtordispSet.count(BaseDecl) && RequiresVtordisp(BasesWithOverriddenMethods, BaseDecl)) HasVtordispSet.insert(BaseDecl); } } /// getASTRecordLayout - Get or compute information about the layout of the /// specified record (struct/union/class), which indicates its size and field /// position information. const ASTRecordLayout & ASTContext::getASTRecordLayout(const RecordDecl *D) const { // These asserts test different things. A record has a definition // as soon as we begin to parse the definition. That definition is // not a complete definition (which is what isDefinition() tests) // until we *finish* parsing the definition. if (D->hasExternalLexicalStorage() && !D->getDefinition()) getExternalSource()->CompleteType(const_cast(D)); D = D->getDefinition(); assert(D && "Cannot get layout of forward declarations!"); assert(!D->isInvalidDecl() && "Cannot get layout of invalid decl!"); assert(D->isCompleteDefinition() && "Cannot layout type before complete!"); // Look up this layout, if already laid out, return what we have. // Note that we can't save a reference to the entry because this function // is recursive. const ASTRecordLayout *Entry = ASTRecordLayouts[D]; if (Entry) return *Entry; const ASTRecordLayout *NewEntry = nullptr; if (isMsLayout(*this)) { MicrosoftRecordLayoutBuilder Builder(*this); if (const auto *RD = dyn_cast(D)) { Builder.cxxLayout(RD); NewEntry = new (*this) ASTRecordLayout( *this, Builder.Size, Builder.Alignment, Builder.Alignment, Builder.Alignment, Builder.RequiredAlignment, Builder.HasOwnVFPtr, Builder.HasOwnVFPtr || Builder.PrimaryBase, Builder.VBPtrOffset, Builder.DataSize, Builder.FieldOffsets, Builder.NonVirtualSize, Builder.Alignment, Builder.Alignment, CharUnits::Zero(), Builder.PrimaryBase, false, Builder.SharedVBPtrBase, Builder.EndsWithZeroSizedObject, Builder.LeadsWithZeroSizedBase, Builder.Bases, Builder.VBases); } else { Builder.layout(D); NewEntry = new (*this) ASTRecordLayout( *this, Builder.Size, Builder.Alignment, Builder.Alignment, Builder.Alignment, Builder.RequiredAlignment, Builder.Size, Builder.FieldOffsets); } } else { if (const auto *RD = dyn_cast(D)) { EmptySubobjectMap EmptySubobjects(*this, RD); ItaniumRecordLayoutBuilder Builder(*this, &EmptySubobjects); Builder.Layout(RD); // In certain situations, we are allowed to lay out objects in the // tail-padding of base classes. This is ABI-dependent. // FIXME: this should be stored in the record layout. bool skipTailPadding = mustSkipTailPadding(getTargetInfo().getCXXABI(), RD); // FIXME: This should be done in FinalizeLayout. CharUnits DataSize = skipTailPadding ? Builder.getSize() : Builder.getDataSize(); CharUnits NonVirtualSize = skipTailPadding ? DataSize : Builder.NonVirtualSize; NewEntry = new (*this) ASTRecordLayout( *this, Builder.getSize(), Builder.Alignment, Builder.PreferredAlignment, Builder.UnadjustedAlignment, /*RequiredAlignment : used by MS-ABI)*/ Builder.Alignment, Builder.HasOwnVFPtr, RD->isDynamicClass(), CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets, NonVirtualSize, Builder.NonVirtualAlignment, Builder.PreferredNVAlignment, EmptySubobjects.SizeOfLargestEmptySubobject, Builder.PrimaryBase, Builder.PrimaryBaseIsVirtual, nullptr, false, false, Builder.Bases, Builder.VBases); } else { ItaniumRecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr); Builder.Layout(D); NewEntry = new (*this) ASTRecordLayout( *this, Builder.getSize(), Builder.Alignment, Builder.PreferredAlignment, Builder.UnadjustedAlignment, /*RequiredAlignment : used by MS-ABI)*/ Builder.Alignment, Builder.getSize(), Builder.FieldOffsets); } } ASTRecordLayouts[D] = NewEntry; if (getLangOpts().DumpRecordLayouts) { llvm::outs() << "\n*** Dumping AST Record Layout\n"; DumpRecordLayout(D, llvm::outs(), getLangOpts().DumpRecordLayoutsSimple); } return *NewEntry; } const CXXMethodDecl *ASTContext::getCurrentKeyFunction(const CXXRecordDecl *RD) { if (!getTargetInfo().getCXXABI().hasKeyFunctions()) return nullptr; assert(RD->getDefinition() && "Cannot get key function for forward decl!"); RD = RD->getDefinition(); // Beware: // 1) computing the key function might trigger deserialization, which might // invalidate iterators into KeyFunctions // 2) 'get' on the LazyDeclPtr might also trigger deserialization and // invalidate the LazyDeclPtr within the map itself LazyDeclPtr Entry = KeyFunctions[RD]; const Decl *Result = Entry ? Entry.get(getExternalSource()) : computeKeyFunction(*this, RD); // Store it back if it changed. if (Entry.isOffset() || Entry.isValid() != bool(Result)) KeyFunctions[RD] = const_cast(Result); return cast_or_null(Result); } void ASTContext::setNonKeyFunction(const CXXMethodDecl *Method) { assert(Method == Method->getFirstDecl() && "not working with method declaration from class definition"); // Look up the cache entry. Since we're working with the first // declaration, its parent must be the class definition, which is // the correct key for the KeyFunctions hash. const auto &Map = KeyFunctions; auto I = Map.find(Method->getParent()); // If it's not cached, there's nothing to do. if (I == Map.end()) return; // If it is cached, check whether it's the target method, and if so, // remove it from the cache. Note, the call to 'get' might invalidate // the iterator and the LazyDeclPtr object within the map. LazyDeclPtr Ptr = I->second; if (Ptr.get(getExternalSource()) == Method) { // FIXME: remember that we did this for module / chained PCH state? KeyFunctions.erase(Method->getParent()); } } static uint64_t getFieldOffset(const ASTContext &C, const FieldDecl *FD) { const ASTRecordLayout &Layout = C.getASTRecordLayout(FD->getParent()); return Layout.getFieldOffset(FD->getFieldIndex()); } uint64_t ASTContext::getFieldOffset(const ValueDecl *VD) const { uint64_t OffsetInBits; if (const FieldDecl *FD = dyn_cast(VD)) { OffsetInBits = ::getFieldOffset(*this, FD); } else { const IndirectFieldDecl *IFD = cast(VD); OffsetInBits = 0; for (const NamedDecl *ND : IFD->chain()) OffsetInBits += ::getFieldOffset(*this, cast(ND)); } return OffsetInBits; } uint64_t ASTContext::lookupFieldBitOffset(const ObjCInterfaceDecl *OID, const ObjCImplementationDecl *ID, const ObjCIvarDecl *Ivar) const { Ivar = Ivar->getCanonicalDecl(); const ObjCInterfaceDecl *Container = Ivar->getContainingInterface(); // FIXME: We should eliminate the need to have ObjCImplementationDecl passed // in here; it should never be necessary because that should be the lexical // decl context for the ivar. // If we know have an implementation (and the ivar is in it) then // look up in the implementation layout. const ASTRecordLayout *RL; if (ID && declaresSameEntity(ID->getClassInterface(), Container)) RL = &getASTObjCImplementationLayout(ID); else RL = &getASTObjCInterfaceLayout(Container); // Compute field index. // // FIXME: The index here is closely tied to how ASTContext::getObjCLayout is // implemented. This should be fixed to get the information from the layout // directly. unsigned Index = 0; for (const ObjCIvarDecl *IVD = Container->all_declared_ivar_begin(); IVD; IVD = IVD->getNextIvar()) { if (Ivar == IVD) break; ++Index; } assert(Index < RL->getFieldCount() && "Ivar is not inside record layout!"); return RL->getFieldOffset(Index); } /// getObjCLayout - Get or compute information about the layout of the /// given interface. /// /// \param Impl - If given, also include the layout of the interface's /// implementation. This may differ by including synthesized ivars. const ASTRecordLayout & ASTContext::getObjCLayout(const ObjCInterfaceDecl *D, const ObjCImplementationDecl *Impl) const { // Retrieve the definition if (D->hasExternalLexicalStorage() && !D->getDefinition()) getExternalSource()->CompleteType(const_cast(D)); D = D->getDefinition(); assert(D && !D->isInvalidDecl() && D->isThisDeclarationADefinition() && "Invalid interface decl!"); // Look up this layout, if already laid out, return what we have. const ObjCContainerDecl *Key = Impl ? (const ObjCContainerDecl*) Impl : (const ObjCContainerDecl*) D; if (const ASTRecordLayout *Entry = ObjCLayouts[Key]) return *Entry; // Add in synthesized ivar count if laying out an implementation. if (Impl) { unsigned SynthCount = CountNonClassIvars(D); // If there aren't any synthesized ivars then reuse the interface // entry. Note we can't cache this because we simply free all // entries later; however we shouldn't look up implementations // frequently. if (SynthCount == 0) return getObjCLayout(D, nullptr); } ItaniumRecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr); Builder.Layout(D); const ASTRecordLayout *NewEntry = new (*this) ASTRecordLayout( *this, Builder.getSize(), Builder.Alignment, Builder.PreferredAlignment, Builder.UnadjustedAlignment, /*RequiredAlignment : used by MS-ABI)*/ Builder.Alignment, Builder.getDataSize(), Builder.FieldOffsets); ObjCLayouts[Key] = NewEntry; return *NewEntry; } static void PrintOffset(raw_ostream &OS, CharUnits Offset, unsigned IndentLevel) { OS << llvm::format("%10" PRId64 " | ", (int64_t)Offset.getQuantity()); OS.indent(IndentLevel * 2); } static void PrintBitFieldOffset(raw_ostream &OS, CharUnits Offset, unsigned Begin, unsigned Width, unsigned IndentLevel) { llvm::SmallString<10> Buffer; { llvm::raw_svector_ostream BufferOS(Buffer); BufferOS << Offset.getQuantity() << ':'; if (Width == 0) { BufferOS << '-'; } else { BufferOS << Begin << '-' << (Begin + Width - 1); } } OS << llvm::right_justify(Buffer, 10) << " | "; OS.indent(IndentLevel * 2); } static void PrintIndentNoOffset(raw_ostream &OS, unsigned IndentLevel) { OS << " | "; OS.indent(IndentLevel * 2); } static void DumpRecordLayout(raw_ostream &OS, const RecordDecl *RD, const ASTContext &C, CharUnits Offset, unsigned IndentLevel, const char* Description, bool PrintSizeInfo, bool IncludeVirtualBases) { const ASTRecordLayout &Layout = C.getASTRecordLayout(RD); auto CXXRD = dyn_cast(RD); PrintOffset(OS, Offset, IndentLevel); OS << C.getTypeDeclType(const_cast(RD)).getAsString(); if (Description) OS << ' ' << Description; if (CXXRD && CXXRD->isEmpty()) OS << " (empty)"; OS << '\n'; IndentLevel++; // Dump bases. if (CXXRD) { const CXXRecordDecl *PrimaryBase = Layout.getPrimaryBase(); bool HasOwnVFPtr = Layout.hasOwnVFPtr(); bool HasOwnVBPtr = Layout.hasOwnVBPtr(); // Vtable pointer. if (CXXRD->isDynamicClass() && !PrimaryBase && !isMsLayout(C)) { PrintOffset(OS, Offset, IndentLevel); OS << '(' << *RD << " vtable pointer)\n"; } else if (HasOwnVFPtr) { PrintOffset(OS, Offset, IndentLevel); // vfptr (for Microsoft C++ ABI) OS << '(' << *RD << " vftable pointer)\n"; } // Collect nvbases. SmallVector Bases; for (const CXXBaseSpecifier &Base : CXXRD->bases()) { assert(!Base.getType()->isDependentType() && "Cannot layout class with dependent bases."); if (!Base.isVirtual()) Bases.push_back(Base.getType()->getAsCXXRecordDecl()); } // Sort nvbases by offset. llvm::stable_sort( Bases, [&](const CXXRecordDecl *L, const CXXRecordDecl *R) { return Layout.getBaseClassOffset(L) < Layout.getBaseClassOffset(R); }); // Dump (non-virtual) bases for (const CXXRecordDecl *Base : Bases) { CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base); DumpRecordLayout(OS, Base, C, BaseOffset, IndentLevel, Base == PrimaryBase ? "(primary base)" : "(base)", /*PrintSizeInfo=*/false, /*IncludeVirtualBases=*/false); } // vbptr (for Microsoft C++ ABI) if (HasOwnVBPtr) { PrintOffset(OS, Offset + Layout.getVBPtrOffset(), IndentLevel); OS << '(' << *RD << " vbtable pointer)\n"; } } // Dump fields. uint64_t FieldNo = 0; for (RecordDecl::field_iterator I = RD->field_begin(), E = RD->field_end(); I != E; ++I, ++FieldNo) { const FieldDecl &Field = **I; uint64_t LocalFieldOffsetInBits = Layout.getFieldOffset(FieldNo); CharUnits FieldOffset = Offset + C.toCharUnitsFromBits(LocalFieldOffsetInBits); // Recursively dump fields of record type. if (auto RT = Field.getType()->getAs()) { DumpRecordLayout(OS, RT->getDecl(), C, FieldOffset, IndentLevel, Field.getName().data(), /*PrintSizeInfo=*/false, /*IncludeVirtualBases=*/true); continue; } if (Field.isBitField()) { uint64_t LocalFieldByteOffsetInBits = C.toBits(FieldOffset - Offset); unsigned Begin = LocalFieldOffsetInBits - LocalFieldByteOffsetInBits; unsigned Width = Field.getBitWidthValue(C); PrintBitFieldOffset(OS, FieldOffset, Begin, Width, IndentLevel); } else { PrintOffset(OS, FieldOffset, IndentLevel); } const QualType &FieldType = C.getLangOpts().DumpRecordLayoutsCanonical ? Field.getType().getCanonicalType() : Field.getType(); OS << FieldType.getAsString() << ' ' << Field << '\n'; } // Dump virtual bases. if (CXXRD && IncludeVirtualBases) { const ASTRecordLayout::VBaseOffsetsMapTy &VtorDisps = Layout.getVBaseOffsetsMap(); for (const CXXBaseSpecifier &Base : CXXRD->vbases()) { assert(Base.isVirtual() && "Found non-virtual class!"); const CXXRecordDecl *VBase = Base.getType()->getAsCXXRecordDecl(); CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBase); if (VtorDisps.find(VBase)->second.hasVtorDisp()) { PrintOffset(OS, VBaseOffset - CharUnits::fromQuantity(4), IndentLevel); OS << "(vtordisp for vbase " << *VBase << ")\n"; } DumpRecordLayout(OS, VBase, C, VBaseOffset, IndentLevel, VBase == Layout.getPrimaryBase() ? "(primary virtual base)" : "(virtual base)", /*PrintSizeInfo=*/false, /*IncludeVirtualBases=*/false); } } if (!PrintSizeInfo) return; PrintIndentNoOffset(OS, IndentLevel - 1); OS << "[sizeof=" << Layout.getSize().getQuantity(); if (CXXRD && !isMsLayout(C)) OS << ", dsize=" << Layout.getDataSize().getQuantity(); OS << ", align=" << Layout.getAlignment().getQuantity(); if (C.getTargetInfo().defaultsToAIXPowerAlignment()) OS << ", preferredalign=" << Layout.getPreferredAlignment().getQuantity(); if (CXXRD) { OS << ",\n"; PrintIndentNoOffset(OS, IndentLevel - 1); OS << " nvsize=" << Layout.getNonVirtualSize().getQuantity(); OS << ", nvalign=" << Layout.getNonVirtualAlignment().getQuantity(); if (C.getTargetInfo().defaultsToAIXPowerAlignment()) OS << ", preferrednvalign=" << Layout.getPreferredNVAlignment().getQuantity(); } OS << "]\n"; } void ASTContext::DumpRecordLayout(const RecordDecl *RD, raw_ostream &OS, bool Simple) const { if (!Simple) { ::DumpRecordLayout(OS, RD, *this, CharUnits(), 0, nullptr, /*PrintSizeInfo*/ true, /*IncludeVirtualBases=*/true); return; } // The "simple" format is designed to be parsed by the // layout-override testing code. There shouldn't be any external // uses of this format --- when LLDB overrides a layout, it sets up // the data structures directly --- so feel free to adjust this as // you like as long as you also update the rudimentary parser for it // in libFrontend. const ASTRecordLayout &Info = getASTRecordLayout(RD); OS << "Type: " << getTypeDeclType(RD).getAsString() << "\n"; OS << "\nLayout: "; OS << "defaultsToAIXPowerAlignment()) OS << " PreferredAlignment:" << toBits(Info.getPreferredAlignment()) << "\n"; OS << " FieldOffsets: ["; for (unsigned i = 0, e = Info.getFieldCount(); i != e; ++i) { if (i) OS << ", "; OS << Info.getFieldOffset(i); } OS << "]>\n"; } diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp index e480d8bd8703..2fea262fd109 100644 --- a/clang/lib/Driver/ToolChains/PPCLinux.cpp +++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp @@ -1,85 +1,87 @@ //===-- PPCLinux.cpp - PowerPC ToolChain Implementations --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "PPCLinux.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" using namespace clang::driver; using namespace clang::driver::toolchains; using namespace llvm::opt; using namespace llvm::sys; // Glibc older than 2.32 doesn't fully support IEEE float128. Here we check // glibc version by looking at dynamic linker name. static bool GlibcSupportsFloat128(const std::string &Linker) { llvm::SmallVector Path; // Resolve potential symlinks to linker. if (fs::real_path(Linker, Path)) return false; llvm::StringRef LinkerName = path::filename(llvm::StringRef(Path.data(), Path.size())); // Since glibc 2.34, the installed .so file is not symlink anymore. But we can // still safely assume it's newer than 2.32. if (LinkerName.startswith("ld64.so")) return true; if (!LinkerName.startswith("ld-2.")) return false; unsigned Minor = (LinkerName[5] - '0') * 10 + (LinkerName[6] - '0'); if (Minor < 32) return false; return true; } PPCLinuxToolChain::PPCLinuxToolChain(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args) : Linux(D, Triple, Args) { if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { StringRef ABIName = A->getValue(); if (ABIName == "ieeelongdouble" && !SupportIEEEFloat128(D, Triple, Args)) D.Diag(diag::warn_drv_unsupported_float_abi_by_lib) << ABIName; } } void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (!DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) && !DriverArgs.hasArg(options::OPT_nobuiltininc)) { const Driver &D = getDriver(); SmallString<128> P(D.ResourceDir); llvm::sys::path::append(P, "include", "ppc_wrappers"); addSystemInclude(DriverArgs, CC1Args, P); } Linux::AddClangSystemIncludeArgs(DriverArgs, CC1Args); } bool PPCLinuxToolChain::SupportIEEEFloat128( const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args) const { if (!Triple.isLittleEndian() || !Triple.isPPC64()) return false; if (Args.hasArg(options::OPT_nostdlib, options::OPT_nostdlibxx)) return true; + CXXStdlibType StdLib = ToolChain::GetCXXStdlibType(Args); bool HasUnsupportedCXXLib = - ToolChain::GetCXXStdlibType(Args) == CST_Libcxx && - GCCInstallation.getVersion().isOlderThan(12, 1, 0); + StdLib == CST_Libcxx || + (StdLib == CST_Libstdcxx && + GCCInstallation.getVersion().isOlderThan(12, 1, 0)); return GlibcSupportsFloat128(Linux::getDynamicLinker(Args)) && !(D.CCCIsCXX() && HasUnsupportedCXXLib); } diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 553a0b31c0ab..7f1ce3da7e7e 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1,4705 +1,4701 @@ //===- CompilerInvocation.cpp ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/Frontend/CompilerInvocation.h" #include "TestModuleFileExtension.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/DebugInfoOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/FileSystemOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Sanitizers.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TargetOptions.h" #include "clang/Basic/Version.h" #include "clang/Basic/Visibility.h" #include "clang/Basic/XRayInstr.h" #include "clang/Config/config.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "clang/Frontend/CommandLineSourceLoc.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/FrontendPluginRegistry.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/Sema/CodeCompleteOptions.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ModuleFileExtension.h" #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/Linker/Linker.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/Remarks/HotnessThresholdParser.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/HashBuilder.h" #include "llvm/Support/Host.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Regex.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include #include #include #include #include #include #include #include #include #include #include using namespace clang; using namespace driver; using namespace options; using namespace llvm::opt; //===----------------------------------------------------------------------===// // Initialization. //===----------------------------------------------------------------------===// CompilerInvocationRefBase::CompilerInvocationRefBase() : LangOpts(new LangOptions()), TargetOpts(new TargetOptions()), DiagnosticOpts(new DiagnosticOptions()), HeaderSearchOpts(new HeaderSearchOptions()), PreprocessorOpts(new PreprocessorOptions()), AnalyzerOpts(new AnalyzerOptions()) {} CompilerInvocationRefBase::CompilerInvocationRefBase( const CompilerInvocationRefBase &X) : LangOpts(new LangOptions(*X.getLangOpts())), TargetOpts(new TargetOptions(X.getTargetOpts())), DiagnosticOpts(new DiagnosticOptions(X.getDiagnosticOpts())), HeaderSearchOpts(new HeaderSearchOptions(X.getHeaderSearchOpts())), PreprocessorOpts(new PreprocessorOptions(X.getPreprocessorOpts())), AnalyzerOpts(new AnalyzerOptions(*X.getAnalyzerOpts())) {} CompilerInvocationRefBase::CompilerInvocationRefBase( CompilerInvocationRefBase &&X) = default; CompilerInvocationRefBase & CompilerInvocationRefBase::operator=(CompilerInvocationRefBase X) { LangOpts.swap(X.LangOpts); TargetOpts.swap(X.TargetOpts); DiagnosticOpts.swap(X.DiagnosticOpts); HeaderSearchOpts.swap(X.HeaderSearchOpts); PreprocessorOpts.swap(X.PreprocessorOpts); AnalyzerOpts.swap(X.AnalyzerOpts); return *this; } CompilerInvocationRefBase & CompilerInvocationRefBase::operator=(CompilerInvocationRefBase &&X) = default; CompilerInvocationRefBase::~CompilerInvocationRefBase() = default; //===----------------------------------------------------------------------===// // Normalizers //===----------------------------------------------------------------------===// #define SIMPLE_ENUM_VALUE_TABLE #include "clang/Driver/Options.inc" #undef SIMPLE_ENUM_VALUE_TABLE static llvm::Optional normalizeSimpleFlag(OptSpecifier Opt, unsigned TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { if (Args.hasArg(Opt)) return true; return None; } static Optional normalizeSimpleNegativeFlag(OptSpecifier Opt, unsigned, const ArgList &Args, DiagnosticsEngine &) { if (Args.hasArg(Opt)) return false; return None; } /// The tblgen-erated code passes in a fifth parameter of an arbitrary type, but /// denormalizeSimpleFlags never looks at it. Avoid bloating compile-time with /// unnecessary template instantiations and just ignore it with a variadic /// argument. static void denormalizeSimpleFlag(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator, Option::OptionClass, unsigned, /*T*/...) { Args.push_back(Spelling); } template static constexpr bool is_uint64_t_convertible() { return !std::is_same::value && llvm::is_integral_or_enum::value; } template (), bool> = false> static auto makeFlagToValueNormalizer(T Value) { return [Value](OptSpecifier Opt, unsigned, const ArgList &Args, DiagnosticsEngine &) -> Optional { if (Args.hasArg(Opt)) return Value; return None; }; } template (), bool> = false> static auto makeFlagToValueNormalizer(T Value) { return makeFlagToValueNormalizer(uint64_t(Value)); } static auto makeBooleanOptionNormalizer(bool Value, bool OtherValue, OptSpecifier OtherOpt) { return [Value, OtherValue, OtherOpt](OptSpecifier Opt, unsigned, const ArgList &Args, DiagnosticsEngine &) -> Optional { if (const Arg *A = Args.getLastArg(Opt, OtherOpt)) { return A->getOption().matches(Opt) ? Value : OtherValue; } return None; }; } static auto makeBooleanOptionDenormalizer(bool Value) { return [Value](SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator, Option::OptionClass, unsigned, bool KeyPath) { if (KeyPath == Value) Args.push_back(Spelling); }; } static void denormalizeStringImpl(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned, const Twine &Value) { switch (OptClass) { case Option::SeparateClass: case Option::JoinedOrSeparateClass: case Option::JoinedAndSeparateClass: Args.push_back(Spelling); Args.push_back(SA(Value)); break; case Option::JoinedClass: case Option::CommaJoinedClass: Args.push_back(SA(Twine(Spelling) + Value)); break; default: llvm_unreachable("Cannot denormalize an option with option class " "incompatible with string denormalization."); } } template static void denormalizeString(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, T Value) { denormalizeStringImpl(Args, Spelling, SA, OptClass, TableIndex, Twine(Value)); } static Optional findValueTableByName(const SimpleEnumValueTable &Table, StringRef Name) { for (int I = 0, E = Table.Size; I != E; ++I) if (Name == Table.Table[I].Name) return Table.Table[I]; return None; } static Optional findValueTableByValue(const SimpleEnumValueTable &Table, unsigned Value) { for (int I = 0, E = Table.Size; I != E; ++I) if (Value == Table.Table[I].Value) return Table.Table[I]; return None; } static llvm::Optional normalizeSimpleEnum(OptSpecifier Opt, unsigned TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { assert(TableIndex < SimpleEnumValueTablesSize); const SimpleEnumValueTable &Table = SimpleEnumValueTables[TableIndex]; auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; StringRef ArgValue = Arg->getValue(); if (auto MaybeEnumVal = findValueTableByName(Table, ArgValue)) return MaybeEnumVal->Value; Diags.Report(diag::err_drv_invalid_value) << Arg->getAsString(Args) << ArgValue; return None; } static void denormalizeSimpleEnumImpl(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, unsigned Value) { assert(TableIndex < SimpleEnumValueTablesSize); const SimpleEnumValueTable &Table = SimpleEnumValueTables[TableIndex]; if (auto MaybeEnumVal = findValueTableByValue(Table, Value)) { denormalizeString(Args, Spelling, SA, OptClass, TableIndex, MaybeEnumVal->Name); } else { llvm_unreachable("The simple enum value was not correctly defined in " "the tablegen option description"); } } template static void denormalizeSimpleEnum(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, T Value) { return denormalizeSimpleEnumImpl(Args, Spelling, SA, OptClass, TableIndex, static_cast(Value)); } static Optional normalizeString(OptSpecifier Opt, int TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; return std::string(Arg->getValue()); } template static Optional normalizeStringIntegral(OptSpecifier Opt, int, const ArgList &Args, DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; IntTy Res; if (StringRef(Arg->getValue()).getAsInteger(0, Res)) { Diags.Report(diag::err_drv_invalid_int_value) << Arg->getAsString(Args) << Arg->getValue(); return None; } return Res; } static Optional> normalizeStringVector(OptSpecifier Opt, int, const ArgList &Args, DiagnosticsEngine &) { return Args.getAllArgValues(Opt); } static void denormalizeStringVector(SmallVectorImpl &Args, const char *Spelling, CompilerInvocation::StringAllocator SA, Option::OptionClass OptClass, unsigned TableIndex, const std::vector &Values) { switch (OptClass) { case Option::CommaJoinedClass: { std::string CommaJoinedValue; if (!Values.empty()) { CommaJoinedValue.append(Values.front()); for (const std::string &Value : llvm::drop_begin(Values, 1)) { CommaJoinedValue.append(","); CommaJoinedValue.append(Value); } } denormalizeString(Args, Spelling, SA, Option::OptionClass::JoinedClass, TableIndex, CommaJoinedValue); break; } case Option::JoinedClass: case Option::SeparateClass: case Option::JoinedOrSeparateClass: for (const std::string &Value : Values) denormalizeString(Args, Spelling, SA, OptClass, TableIndex, Value); break; default: llvm_unreachable("Cannot denormalize an option with option class " "incompatible with string vector denormalization."); } } static Optional normalizeTriple(OptSpecifier Opt, int TableIndex, const ArgList &Args, DiagnosticsEngine &Diags) { auto *Arg = Args.getLastArg(Opt); if (!Arg) return None; return llvm::Triple::normalize(Arg->getValue()); } template static T mergeForwardValue(T KeyPath, U Value) { return static_cast(Value); } template static T mergeMaskValue(T KeyPath, U Value) { return KeyPath | Value; } template static T extractForwardValue(T KeyPath) { return KeyPath; } template static T extractMaskValue(T KeyPath) { return ((KeyPath & Value) == Value) ? static_cast(Value) : T(); } #define PARSE_OPTION_WITH_MARSHALLING( \ ARGS, DIAGS, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) \ if ((FLAGS)&options::CC1Option) { \ KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE); \ if (IMPLIED_CHECK) \ KEYPATH = MERGER(KEYPATH, IMPLIED_VALUE); \ if (SHOULD_PARSE) \ if (auto MaybeValue = NORMALIZER(OPT_##ID, TABLE_INDEX, ARGS, DIAGS)) \ KEYPATH = \ MERGER(KEYPATH, static_cast(*MaybeValue)); \ } // Capture the extracted value as a lambda argument to avoid potential issues // with lifetime extension of the reference. #define GENERATE_OPTION_WITH_MARSHALLING( \ ARGS, STRING_ALLOCATOR, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, \ TABLE_INDEX) \ if ((FLAGS)&options::CC1Option) { \ [&](const auto &Extracted) { \ if (ALWAYS_EMIT || \ (Extracted != \ static_cast((IMPLIED_CHECK) ? (IMPLIED_VALUE) \ : (DEFAULT_VALUE)))) \ DENORMALIZER(ARGS, SPELLING, STRING_ALLOCATOR, Option::KIND##Class, \ TABLE_INDEX, Extracted); \ }(EXTRACTOR(KEYPATH)); \ } static StringRef GetInputKindName(InputKind IK); static bool FixupInvocation(CompilerInvocation &Invocation, DiagnosticsEngine &Diags, const ArgList &Args, InputKind IK) { unsigned NumErrorsBefore = Diags.getNumErrors(); LangOptions &LangOpts = *Invocation.getLangOpts(); CodeGenOptions &CodeGenOpts = Invocation.getCodeGenOpts(); TargetOptions &TargetOpts = Invocation.getTargetOpts(); FrontendOptions &FrontendOpts = Invocation.getFrontendOpts(); CodeGenOpts.XRayInstrumentFunctions = LangOpts.XRayInstrument; CodeGenOpts.XRayAlwaysEmitCustomEvents = LangOpts.XRayAlwaysEmitCustomEvents; CodeGenOpts.XRayAlwaysEmitTypedEvents = LangOpts.XRayAlwaysEmitTypedEvents; CodeGenOpts.DisableFree = FrontendOpts.DisableFree; FrontendOpts.GenerateGlobalModuleIndex = FrontendOpts.UseGlobalModuleIndex; if (FrontendOpts.ShowStats) CodeGenOpts.ClearASTBeforeBackend = false; LangOpts.SanitizeCoverage = CodeGenOpts.hasSanitizeCoverage(); LangOpts.ForceEmitVTables = CodeGenOpts.ForceEmitVTables; LangOpts.SpeculativeLoadHardening = CodeGenOpts.SpeculativeLoadHardening; LangOpts.CurrentModule = LangOpts.ModuleName; llvm::Triple T(TargetOpts.Triple); llvm::Triple::ArchType Arch = T.getArch(); CodeGenOpts.CodeModel = TargetOpts.CodeModel; if (LangOpts.getExceptionHandling() != LangOptions::ExceptionHandlingKind::None && T.isWindowsMSVCEnvironment()) Diags.Report(diag::err_fe_invalid_exception_model) << static_cast(LangOpts.getExceptionHandling()) << T.str(); if (LangOpts.AppleKext && !LangOpts.CPlusPlus) Diags.Report(diag::warn_c_kext); if (Args.hasArg(OPT_fconcepts_ts)) Diags.Report(diag::warn_fe_concepts_ts_flag); if (LangOpts.NewAlignOverride && !llvm::isPowerOf2_32(LangOpts.NewAlignOverride)) { Arg *A = Args.getLastArg(OPT_fnew_alignment_EQ); Diags.Report(diag::err_fe_invalid_alignment) << A->getAsString(Args) << A->getValue(); LangOpts.NewAlignOverride = 0; } // Prevent the user from specifying both -fsycl-is-device and -fsycl-is-host. if (LangOpts.SYCLIsDevice && LangOpts.SYCLIsHost) Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fsycl-is-device" << "-fsycl-is-host"; if (Args.hasArg(OPT_fgnu89_inline) && LangOpts.CPlusPlus) Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fgnu89-inline" << GetInputKindName(IK); if (Args.hasArg(OPT_fgpu_allow_device_init) && !LangOpts.HIP) Diags.Report(diag::warn_ignored_hip_only_option) << Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args); if (Args.hasArg(OPT_gpu_max_threads_per_block_EQ) && !LangOpts.HIP) Diags.Report(diag::warn_ignored_hip_only_option) << Args.getLastArg(OPT_gpu_max_threads_per_block_EQ)->getAsString(Args); // -cl-strict-aliasing needs to emit diagnostic in the case where CL > 1.0. // This option should be deprecated for CL > 1.0 because // this option was added for compatibility with OpenCL 1.0. if (Args.getLastArg(OPT_cl_strict_aliasing) && (LangOpts.getOpenCLCompatibleVersion() > 100)) Diags.Report(diag::warn_option_invalid_ocl_version) << LangOpts.getOpenCLVersionString() << Args.getLastArg(OPT_cl_strict_aliasing)->getAsString(Args); if (Arg *A = Args.getLastArg(OPT_fdefault_calling_conv_EQ)) { auto DefaultCC = LangOpts.getDefaultCallingConv(); bool emitError = (DefaultCC == LangOptions::DCC_FastCall || DefaultCC == LangOptions::DCC_StdCall) && Arch != llvm::Triple::x86; emitError |= (DefaultCC == LangOptions::DCC_VectorCall || DefaultCC == LangOptions::DCC_RegCall) && !T.isX86(); if (emitError) Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << T.getTriple(); } if (!CodeGenOpts.ProfileRemappingFile.empty() && CodeGenOpts.LegacyPassManager) Diags.Report(diag::err_drv_argument_only_allowed_with) << Args.getLastArg(OPT_fprofile_remapping_file_EQ)->getAsString(Args) << "-fno-legacy-pass-manager"; return Diags.getNumErrors() == NumErrorsBefore; } //===----------------------------------------------------------------------===// // Deserialization (from args) //===----------------------------------------------------------------------===// static unsigned getOptimizationLevel(ArgList &Args, InputKind IK, DiagnosticsEngine &Diags) { unsigned DefaultOpt = llvm::CodeGenOpt::None; if ((IK.getLanguage() == Language::OpenCL || IK.getLanguage() == Language::OpenCLCXX) && !Args.hasArg(OPT_cl_opt_disable)) DefaultOpt = llvm::CodeGenOpt::Default; if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O0)) return llvm::CodeGenOpt::None; if (A->getOption().matches(options::OPT_Ofast)) return llvm::CodeGenOpt::Aggressive; assert(A->getOption().matches(options::OPT_O)); StringRef S(A->getValue()); if (S == "s" || S == "z") return llvm::CodeGenOpt::Default; if (S == "g") return llvm::CodeGenOpt::Less; return getLastArgIntValue(Args, OPT_O, DefaultOpt, Diags); } return DefaultOpt; } static unsigned getOptimizationLevelSize(ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { if (A->getOption().matches(options::OPT_O)) { switch (A->getValue()[0]) { default: return 0; case 's': return 1; case 'z': return 2; } } } return 0; } static void GenerateArg(SmallVectorImpl &Args, llvm::opt::OptSpecifier OptSpecifier, CompilerInvocation::StringAllocator SA) { Option Opt = getDriverOptTable().getOption(OptSpecifier); denormalizeSimpleFlag(Args, SA(Opt.getPrefix() + Opt.getName()), SA, Option::OptionClass::FlagClass, 0); } static void GenerateArg(SmallVectorImpl &Args, llvm::opt::OptSpecifier OptSpecifier, const Twine &Value, CompilerInvocation::StringAllocator SA) { Option Opt = getDriverOptTable().getOption(OptSpecifier); denormalizeString(Args, SA(Opt.getPrefix() + Opt.getName()), SA, Opt.getKind(), 0, Value); } // Parse command line arguments into CompilerInvocation. using ParseFn = llvm::function_ref, DiagnosticsEngine &, const char *)>; // Generate command line arguments from CompilerInvocation. using GenerateFn = llvm::function_ref &, CompilerInvocation::StringAllocator)>; // May perform round-trip of command line arguments. By default, the round-trip // is enabled in assert builds. This can be overwritten at run-time via the // "-round-trip-args" and "-no-round-trip-args" command line flags. // During round-trip, the command line arguments are parsed into a dummy // instance of CompilerInvocation which is used to generate the command line // arguments again. The real CompilerInvocation instance is then created by // parsing the generated arguments, not the original ones. static bool RoundTrip(ParseFn Parse, GenerateFn Generate, CompilerInvocation &RealInvocation, CompilerInvocation &DummyInvocation, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { #ifndef NDEBUG bool DoRoundTripDefault = true; #else bool DoRoundTripDefault = false; #endif bool DoRoundTrip = DoRoundTripDefault; for (const auto *Arg : CommandLineArgs) { if (Arg == StringRef("-round-trip-args")) DoRoundTrip = true; if (Arg == StringRef("-no-round-trip-args")) DoRoundTrip = false; } // If round-trip was not requested, simply run the parser with the real // invocation diagnostics. if (!DoRoundTrip) return Parse(RealInvocation, CommandLineArgs, Diags, Argv0); // Serializes quoted (and potentially escaped) arguments. auto SerializeArgs = [](ArrayRef Args) { std::string Buffer; llvm::raw_string_ostream OS(Buffer); for (const char *Arg : Args) { llvm::sys::printArg(OS, Arg, /*Quote=*/true); OS << ' '; } OS.flush(); return Buffer; }; // Setup a dummy DiagnosticsEngine. DiagnosticsEngine DummyDiags(new DiagnosticIDs(), new DiagnosticOptions()); DummyDiags.setClient(new TextDiagnosticBuffer()); // Run the first parse on the original arguments with the dummy invocation and // diagnostics. if (!Parse(DummyInvocation, CommandLineArgs, DummyDiags, Argv0) || DummyDiags.getNumWarnings() != 0) { // If the first parse did not succeed, it must be user mistake (invalid // command line arguments). We won't be able to generate arguments that // would reproduce the same result. Let's fail again with the real // invocation and diagnostics, so all side-effects of parsing are visible. unsigned NumWarningsBefore = Diags.getNumWarnings(); auto Success = Parse(RealInvocation, CommandLineArgs, Diags, Argv0); if (!Success || Diags.getNumWarnings() != NumWarningsBefore) return Success; // Parse with original options and diagnostics succeeded even though it // shouldn't have. Something is off. Diags.Report(diag::err_cc1_round_trip_fail_then_ok); Diags.Report(diag::note_cc1_round_trip_original) << SerializeArgs(CommandLineArgs); return false; } // Setup string allocator. llvm::BumpPtrAllocator Alloc; llvm::StringSaver StringPool(Alloc); auto SA = [&StringPool](const Twine &Arg) { return StringPool.save(Arg).data(); }; // Generate arguments from the dummy invocation. If Generate is the // inverse of Parse, the newly generated arguments must have the same // semantics as the original. SmallVector GeneratedArgs1; Generate(DummyInvocation, GeneratedArgs1, SA); // Run the second parse, now on the generated arguments, and with the real // invocation and diagnostics. The result is what we will end up using for the // rest of compilation, so if Generate is not inverse of Parse, something down // the line will break. bool Success2 = Parse(RealInvocation, GeneratedArgs1, Diags, Argv0); // The first parse on original arguments succeeded, but second parse of // generated arguments failed. Something must be wrong with the generator. if (!Success2) { Diags.Report(diag::err_cc1_round_trip_ok_then_fail); Diags.Report(diag::note_cc1_round_trip_generated) << 1 << SerializeArgs(GeneratedArgs1); return false; } // Generate arguments again, this time from the options we will end up using // for the rest of the compilation. SmallVector GeneratedArgs2; Generate(RealInvocation, GeneratedArgs2, SA); // Compares two lists of generated arguments. auto Equal = [](const ArrayRef A, const ArrayRef B) { return std::equal(A.begin(), A.end(), B.begin(), B.end(), [](const char *AElem, const char *BElem) { return StringRef(AElem) == StringRef(BElem); }); }; // If we generated different arguments from what we assume are two // semantically equivalent CompilerInvocations, the Generate function may // be non-deterministic. if (!Equal(GeneratedArgs1, GeneratedArgs2)) { Diags.Report(diag::err_cc1_round_trip_mismatch); Diags.Report(diag::note_cc1_round_trip_generated) << 1 << SerializeArgs(GeneratedArgs1); Diags.Report(diag::note_cc1_round_trip_generated) << 2 << SerializeArgs(GeneratedArgs2); return false; } Diags.Report(diag::remark_cc1_round_trip_generated) << 1 << SerializeArgs(GeneratedArgs1); Diags.Report(diag::remark_cc1_round_trip_generated) << 2 << SerializeArgs(GeneratedArgs2); return Success2; } static void addDiagnosticArgs(ArgList &Args, OptSpecifier Group, OptSpecifier GroupWithValue, std::vector &Diagnostics) { for (auto *A : Args.filtered(Group)) { if (A->getOption().getKind() == Option::FlagClass) { // The argument is a pure flag (such as OPT_Wall or OPT_Wdeprecated). Add // its name (minus the "W" or "R" at the beginning) to the diagnostics. Diagnostics.push_back( std::string(A->getOption().getName().drop_front(1))); } else if (A->getOption().matches(GroupWithValue)) { // This is -Wfoo= or -Rfoo=, where foo is the name of the diagnostic // group. Add only the group name to the diagnostics. Diagnostics.push_back( std::string(A->getOption().getName().drop_front(1).rtrim("=-"))); } else { // Otherwise, add its value (for OPT_W_Joined and similar). Diagnostics.push_back(A->getValue()); } } } // Parse the Static Analyzer configuration. If \p Diags is set to nullptr, // it won't verify the input. static void parseAnalyzerConfigs(AnalyzerOptions &AnOpts, DiagnosticsEngine *Diags); static void getAllNoBuiltinFuncValues(ArgList &Args, std::vector &Funcs) { std::vector Values = Args.getAllArgValues(OPT_fno_builtin_); auto BuiltinEnd = llvm::partition(Values, Builtin::Context::isBuiltinFunc); Funcs.insert(Funcs.end(), Values.begin(), BuiltinEnd); } static void GenerateAnalyzerArgs(AnalyzerOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const AnalyzerOptions *AnalyzerOpts = &Opts; #define ANALYZER_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef ANALYZER_OPTION_WITH_MARSHALLING if (Opts.AnalysisStoreOpt != RegionStoreModel) { switch (Opts.AnalysisStoreOpt) { #define ANALYSIS_STORE(NAME, CMDFLAG, DESC, CREATFN) \ case NAME##Model: \ GenerateArg(Args, OPT_analyzer_store, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis store."); } } if (Opts.AnalysisConstraintsOpt != RangeConstraintsModel) { switch (Opts.AnalysisConstraintsOpt) { #define ANALYSIS_CONSTRAINTS(NAME, CMDFLAG, DESC, CREATFN) \ case NAME##Model: \ GenerateArg(Args, OPT_analyzer_constraints, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis constraint."); } } if (Opts.AnalysisDiagOpt != PD_HTML) { switch (Opts.AnalysisDiagOpt) { #define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATFN) \ case PD_##NAME: \ GenerateArg(Args, OPT_analyzer_output, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis diagnostic client."); } } if (Opts.AnalysisPurgeOpt != PurgeStmt) { switch (Opts.AnalysisPurgeOpt) { #define ANALYSIS_PURGE(NAME, CMDFLAG, DESC) \ case NAME: \ GenerateArg(Args, OPT_analyzer_purge, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis purge mode."); } } if (Opts.InliningMode != NoRedundancy) { switch (Opts.InliningMode) { #define ANALYSIS_INLINING_MODE(NAME, CMDFLAG, DESC) \ case NAME: \ GenerateArg(Args, OPT_analyzer_inlining_mode, CMDFLAG, SA); \ break; #include "clang/StaticAnalyzer/Core/Analyses.def" default: llvm_unreachable("Tried to generate unknown analysis inlining mode."); } } for (const auto &CP : Opts.CheckersAndPackages) { OptSpecifier Opt = CP.second ? OPT_analyzer_checker : OPT_analyzer_disable_checker; GenerateArg(Args, Opt, CP.first, SA); } AnalyzerOptions ConfigOpts; parseAnalyzerConfigs(ConfigOpts, nullptr); for (const auto &C : Opts.Config) { // Don't generate anything that came from parseAnalyzerConfigs. It would be // redundant and may not be valid on the command line. auto Entry = ConfigOpts.Config.find(C.getKey()); if (Entry != ConfigOpts.Config.end() && Entry->getValue() == C.getValue()) continue; GenerateArg(Args, OPT_analyzer_config, C.getKey() + "=" + C.getValue(), SA); } // Nothing to generate for FullCompilerInvocation. } static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); AnalyzerOptions *AnalyzerOpts = &Opts; #define ANALYZER_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef ANALYZER_OPTION_WITH_MARSHALLING if (Arg *A = Args.getLastArg(OPT_analyzer_store)) { StringRef Name = A->getValue(); AnalysisStores Value = llvm::StringSwitch(Name) #define ANALYSIS_STORE(NAME, CMDFLAG, DESC, CREATFN) \ .Case(CMDFLAG, NAME##Model) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumStores); if (Value == NumStores) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisStoreOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_constraints)) { StringRef Name = A->getValue(); AnalysisConstraints Value = llvm::StringSwitch(Name) #define ANALYSIS_CONSTRAINTS(NAME, CMDFLAG, DESC, CREATFN) \ .Case(CMDFLAG, NAME##Model) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumConstraints); if (Value == NumConstraints) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisConstraintsOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_output)) { StringRef Name = A->getValue(); AnalysisDiagClients Value = llvm::StringSwitch(Name) #define ANALYSIS_DIAGNOSTICS(NAME, CMDFLAG, DESC, CREATFN) \ .Case(CMDFLAG, PD_##NAME) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NUM_ANALYSIS_DIAG_CLIENTS); if (Value == NUM_ANALYSIS_DIAG_CLIENTS) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisDiagOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_purge)) { StringRef Name = A->getValue(); AnalysisPurgeMode Value = llvm::StringSwitch(Name) #define ANALYSIS_PURGE(NAME, CMDFLAG, DESC) \ .Case(CMDFLAG, NAME) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumPurgeModes); if (Value == NumPurgeModes) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.AnalysisPurgeOpt = Value; } } if (Arg *A = Args.getLastArg(OPT_analyzer_inlining_mode)) { StringRef Name = A->getValue(); AnalysisInliningMode Value = llvm::StringSwitch(Name) #define ANALYSIS_INLINING_MODE(NAME, CMDFLAG, DESC) \ .Case(CMDFLAG, NAME) #include "clang/StaticAnalyzer/Core/Analyses.def" .Default(NumInliningModes); if (Value == NumInliningModes) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else { Opts.InliningMode = Value; } } Opts.CheckersAndPackages.clear(); for (const Arg *A : Args.filtered(OPT_analyzer_checker, OPT_analyzer_disable_checker)) { A->claim(); bool IsEnabled = A->getOption().getID() == OPT_analyzer_checker; // We can have a list of comma separated checker names, e.g: // '-analyzer-checker=cocoa,unix' StringRef CheckerAndPackageList = A->getValue(); SmallVector CheckersAndPackages; CheckerAndPackageList.split(CheckersAndPackages, ","); for (const StringRef &CheckerOrPackage : CheckersAndPackages) Opts.CheckersAndPackages.emplace_back(std::string(CheckerOrPackage), IsEnabled); } // Go through the analyzer configuration options. for (const auto *A : Args.filtered(OPT_analyzer_config)) { // We can have a list of comma separated config names, e.g: // '-analyzer-config key1=val1,key2=val2' StringRef configList = A->getValue(); SmallVector configVals; configList.split(configVals, ","); for (const auto &configVal : configVals) { StringRef key, val; std::tie(key, val) = configVal.split("="); if (val.empty()) { Diags.Report(SourceLocation(), diag::err_analyzer_config_no_value) << configVal; break; } if (val.contains('=')) { Diags.Report(SourceLocation(), diag::err_analyzer_config_multiple_values) << configVal; break; } // TODO: Check checker options too, possibly in CheckerRegistry. // Leave unknown non-checker configs unclaimed. if (!key.contains(":") && Opts.isUnknownAnalyzerConfig(key)) { if (Opts.ShouldEmitErrorsOnInvalidConfigValue) Diags.Report(diag::err_analyzer_config_unknown) << key; continue; } A->claim(); Opts.Config[key] = std::string(val); } } if (Opts.ShouldEmitErrorsOnInvalidConfigValue) parseAnalyzerConfigs(Opts, &Diags); else parseAnalyzerConfigs(Opts, nullptr); llvm::raw_string_ostream os(Opts.FullCompilerInvocation); for (unsigned i = 0; i < Args.getNumInputArgStrings(); ++i) { if (i != 0) os << " "; os << Args.getArgString(i); } os.flush(); return Diags.getNumErrors() == NumErrorsBefore; } static StringRef getStringOption(AnalyzerOptions::ConfigTable &Config, StringRef OptionName, StringRef DefaultVal) { return Config.insert({OptionName, std::string(DefaultVal)}).first->second; } static void initOption(AnalyzerOptions::ConfigTable &Config, DiagnosticsEngine *Diags, StringRef &OptionField, StringRef Name, StringRef DefaultVal) { // String options may be known to invalid (e.g. if the expected string is a // file name, but the file does not exist), those will have to be checked in // parseConfigs. OptionField = getStringOption(Config, Name, DefaultVal); } static void initOption(AnalyzerOptions::ConfigTable &Config, DiagnosticsEngine *Diags, bool &OptionField, StringRef Name, bool DefaultVal) { auto PossiblyInvalidVal = llvm::StringSwitch>( getStringOption(Config, Name, (DefaultVal ? "true" : "false"))) .Case("true", true) .Case("false", false) .Default(None); if (!PossiblyInvalidVal) { if (Diags) Diags->Report(diag::err_analyzer_config_invalid_input) << Name << "a boolean"; else OptionField = DefaultVal; } else OptionField = PossiblyInvalidVal.getValue(); } static void initOption(AnalyzerOptions::ConfigTable &Config, DiagnosticsEngine *Diags, unsigned &OptionField, StringRef Name, unsigned DefaultVal) { OptionField = DefaultVal; bool HasFailed = getStringOption(Config, Name, std::to_string(DefaultVal)) .getAsInteger(0, OptionField); if (Diags && HasFailed) Diags->Report(diag::err_analyzer_config_invalid_input) << Name << "an unsigned"; } static void parseAnalyzerConfigs(AnalyzerOptions &AnOpts, DiagnosticsEngine *Diags) { // TODO: There's no need to store the entire configtable, it'd be plenty // enough tostore checker options. #define ANALYZER_OPTION(TYPE, NAME, CMDFLAG, DESC, DEFAULT_VAL) \ initOption(AnOpts.Config, Diags, AnOpts.NAME, CMDFLAG, DEFAULT_VAL); #define ANALYZER_OPTION_DEPENDS_ON_USER_MODE(TYPE, NAME, CMDFLAG, DESC, \ SHALLOW_VAL, DEEP_VAL) \ switch (AnOpts.getUserMode()) { \ case UMK_Shallow: \ initOption(AnOpts.Config, Diags, AnOpts.NAME, CMDFLAG, SHALLOW_VAL); \ break; \ case UMK_Deep: \ initOption(AnOpts.Config, Diags, AnOpts.NAME, CMDFLAG, DEEP_VAL); \ break; \ } \ #include "clang/StaticAnalyzer/Core/AnalyzerOptions.def" #undef ANALYZER_OPTION #undef ANALYZER_OPTION_DEPENDS_ON_USER_MODE // At this point, AnalyzerOptions is configured. Let's validate some options. // FIXME: Here we try to validate the silenced checkers or packages are valid. // The current approach only validates the registered checkers which does not // contain the runtime enabled checkers and optimally we would validate both. if (!AnOpts.RawSilencedCheckersAndPackages.empty()) { std::vector Checkers = AnOpts.getRegisteredCheckers(/*IncludeExperimental=*/true); std::vector Packages = AnOpts.getRegisteredPackages(/*IncludeExperimental=*/true); SmallVector CheckersAndPackages; AnOpts.RawSilencedCheckersAndPackages.split(CheckersAndPackages, ";"); for (const StringRef &CheckerOrPackage : CheckersAndPackages) { if (Diags) { bool IsChecker = CheckerOrPackage.contains('.'); bool IsValidName = IsChecker ? llvm::is_contained(Checkers, CheckerOrPackage) : llvm::is_contained(Packages, CheckerOrPackage); if (!IsValidName) Diags->Report(diag::err_unknown_analyzer_checker_or_package) << CheckerOrPackage; } AnOpts.SilencedCheckersAndPackages.emplace_back(CheckerOrPackage); } } if (!Diags) return; if (AnOpts.ShouldTrackConditionsDebug && !AnOpts.ShouldTrackConditions) Diags->Report(diag::err_analyzer_config_invalid_input) << "track-conditions-debug" << "'track-conditions' to also be enabled"; if (!AnOpts.CTUDir.empty() && !llvm::sys::fs::is_directory(AnOpts.CTUDir)) Diags->Report(diag::err_analyzer_config_invalid_input) << "ctu-dir" << "a filename"; if (!AnOpts.ModelPath.empty() && !llvm::sys::fs::is_directory(AnOpts.ModelPath)) Diags->Report(diag::err_analyzer_config_invalid_input) << "model-path" << "a filename"; } /// Generate a remark argument. This is an inverse of `ParseOptimizationRemark`. static void GenerateOptimizationRemark(SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, OptSpecifier OptEQ, StringRef Name, const CodeGenOptions::OptRemark &Remark) { if (Remark.hasValidPattern()) { GenerateArg(Args, OptEQ, Remark.Pattern, SA); } else if (Remark.Kind == CodeGenOptions::RK_Enabled) { GenerateArg(Args, OPT_R_Joined, Name, SA); } else if (Remark.Kind == CodeGenOptions::RK_Disabled) { GenerateArg(Args, OPT_R_Joined, StringRef("no-") + Name, SA); } } /// Parse a remark command line argument. It may be missing, disabled/enabled by /// '-R[no-]group' or specified with a regular expression by '-Rgroup=regexp'. /// On top of that, it can be disabled/enabled globally by '-R[no-]everything'. static CodeGenOptions::OptRemark ParseOptimizationRemark(DiagnosticsEngine &Diags, ArgList &Args, OptSpecifier OptEQ, StringRef Name) { CodeGenOptions::OptRemark Result; auto InitializeResultPattern = [&Diags, &Args, &Result](const Arg *A, StringRef Pattern) { Result.Pattern = Pattern.str(); std::string RegexError; Result.Regex = std::make_shared(Result.Pattern); if (!Result.Regex->isValid(RegexError)) { Diags.Report(diag::err_drv_optimization_remark_pattern) << RegexError << A->getAsString(Args); return false; } return true; }; for (Arg *A : Args) { if (A->getOption().matches(OPT_R_Joined)) { StringRef Value = A->getValue(); if (Value == Name) Result.Kind = CodeGenOptions::RK_Enabled; else if (Value == "everything") Result.Kind = CodeGenOptions::RK_EnabledEverything; else if (Value.split('-') == std::make_pair(StringRef("no"), Name)) Result.Kind = CodeGenOptions::RK_Disabled; else if (Value == "no-everything") Result.Kind = CodeGenOptions::RK_DisabledEverything; else continue; if (Result.Kind == CodeGenOptions::RK_Disabled || Result.Kind == CodeGenOptions::RK_DisabledEverything) { Result.Pattern = ""; Result.Regex = nullptr; } else { InitializeResultPattern(A, ".*"); } } else if (A->getOption().matches(OptEQ)) { Result.Kind = CodeGenOptions::RK_WithPattern; if (!InitializeResultPattern(A, A->getValue())) return CodeGenOptions::OptRemark(); } } return Result; } static bool parseDiagnosticLevelMask(StringRef FlagName, const std::vector &Levels, DiagnosticsEngine &Diags, DiagnosticLevelMask &M) { bool Success = true; for (const auto &Level : Levels) { DiagnosticLevelMask const PM = llvm::StringSwitch(Level) .Case("note", DiagnosticLevelMask::Note) .Case("remark", DiagnosticLevelMask::Remark) .Case("warning", DiagnosticLevelMask::Warning) .Case("error", DiagnosticLevelMask::Error) .Default(DiagnosticLevelMask::None); if (PM == DiagnosticLevelMask::None) { Success = false; Diags.Report(diag::err_drv_invalid_value) << FlagName << Level; } M = M | PM; } return Success; } static void parseSanitizerKinds(StringRef FlagName, const std::vector &Sanitizers, DiagnosticsEngine &Diags, SanitizerSet &S) { for (const auto &Sanitizer : Sanitizers) { SanitizerMask K = parseSanitizerValue(Sanitizer, /*AllowGroups=*/false); if (K == SanitizerMask()) Diags.Report(diag::err_drv_invalid_value) << FlagName << Sanitizer; else S.set(K, true); } } static SmallVector serializeSanitizerKinds(SanitizerSet S) { SmallVector Values; serializeSanitizerSet(S, Values); return Values; } static void parseXRayInstrumentationBundle(StringRef FlagName, StringRef Bundle, ArgList &Args, DiagnosticsEngine &D, XRayInstrSet &S) { llvm::SmallVector BundleParts; llvm::SplitString(Bundle, BundleParts, ","); for (const auto &B : BundleParts) { auto Mask = parseXRayInstrValue(B); if (Mask == XRayInstrKind::None) if (B != "none") D.Report(diag::err_drv_invalid_value) << FlagName << Bundle; else S.Mask = Mask; else if (Mask == XRayInstrKind::All) S.Mask = Mask; else S.set(Mask, true); } } static std::string serializeXRayInstrumentationBundle(const XRayInstrSet &S) { llvm::SmallVector BundleParts; serializeXRayInstrValue(S, BundleParts); std::string Buffer; llvm::raw_string_ostream OS(Buffer); llvm::interleave(BundleParts, OS, [&OS](StringRef Part) { OS << Part; }, ","); return Buffer; } // Set the profile kind using fprofile-instrument-use-path. static void setPGOUseInstrumentor(CodeGenOptions &Opts, const Twine &ProfileName) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create(ProfileName); // In error, return silently and let Clang PGOUse report the error message. if (auto E = ReaderOrErr.takeError()) { llvm::consumeError(std::move(E)); Opts.setProfileUse(CodeGenOptions::ProfileClangInstr); return; } std::unique_ptr PGOReader = std::move(ReaderOrErr.get()); if (PGOReader->isIRLevelProfile()) { if (PGOReader->hasCSIRLevelProfile()) Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr); else Opts.setProfileUse(CodeGenOptions::ProfileIRInstr); } else Opts.setProfileUse(CodeGenOptions::ProfileClangInstr); } void CompilerInvocation::GenerateCodeGenArgs( const CodeGenOptions &Opts, SmallVectorImpl &Args, StringAllocator SA, const llvm::Triple &T, const std::string &OutputFile, const LangOptions *LangOpts) { const CodeGenOptions &CodeGenOpts = Opts; if (Opts.OptimizationLevel == 0) GenerateArg(Args, OPT_O0, SA); else GenerateArg(Args, OPT_O, Twine(Opts.OptimizationLevel), SA); #define CODEGEN_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef CODEGEN_OPTION_WITH_MARSHALLING if (Opts.OptimizationLevel > 0) { if (Opts.Inlining == CodeGenOptions::NormalInlining) GenerateArg(Args, OPT_finline_functions, SA); else if (Opts.Inlining == CodeGenOptions::OnlyHintInlining) GenerateArg(Args, OPT_finline_hint_functions, SA); else if (Opts.Inlining == CodeGenOptions::OnlyAlwaysInlining) GenerateArg(Args, OPT_fno_inline, SA); } if (Opts.DirectAccessExternalData && LangOpts->PICLevel != 0) GenerateArg(Args, OPT_fdirect_access_external_data, SA); else if (!Opts.DirectAccessExternalData && LangOpts->PICLevel == 0) GenerateArg(Args, OPT_fno_direct_access_external_data, SA); Optional DebugInfoVal; switch (Opts.DebugInfo) { case codegenoptions::DebugLineTablesOnly: DebugInfoVal = "line-tables-only"; break; case codegenoptions::DebugDirectivesOnly: DebugInfoVal = "line-directives-only"; break; case codegenoptions::DebugInfoConstructor: DebugInfoVal = "constructor"; break; case codegenoptions::LimitedDebugInfo: DebugInfoVal = "limited"; break; case codegenoptions::FullDebugInfo: DebugInfoVal = "standalone"; break; case codegenoptions::UnusedTypeInfo: DebugInfoVal = "unused-types"; break; case codegenoptions::NoDebugInfo: // default value DebugInfoVal = None; break; case codegenoptions::LocTrackingOnly: // implied value DebugInfoVal = None; break; } if (DebugInfoVal) GenerateArg(Args, OPT_debug_info_kind_EQ, *DebugInfoVal, SA); for (const auto &Prefix : Opts.DebugPrefixMap) GenerateArg(Args, OPT_fdebug_prefix_map_EQ, Prefix.first + "=" + Prefix.second, SA); for (const auto &Prefix : Opts.CoveragePrefixMap) GenerateArg(Args, OPT_fcoverage_prefix_map_EQ, Prefix.first + "=" + Prefix.second, SA); if (Opts.NewStructPathTBAA) GenerateArg(Args, OPT_new_struct_path_tbaa, SA); if (Opts.OptimizeSize == 1) GenerateArg(Args, OPT_O, "s", SA); else if (Opts.OptimizeSize == 2) GenerateArg(Args, OPT_O, "z", SA); // SimplifyLibCalls is set only in the absence of -fno-builtin and // -ffreestanding. We'll consider that when generating them. // NoBuiltinFuncs are generated by LangOptions. if (Opts.UnrollLoops && Opts.OptimizationLevel <= 1) GenerateArg(Args, OPT_funroll_loops, SA); else if (!Opts.UnrollLoops && Opts.OptimizationLevel > 1) GenerateArg(Args, OPT_fno_unroll_loops, SA); if (!Opts.BinutilsVersion.empty()) GenerateArg(Args, OPT_fbinutils_version_EQ, Opts.BinutilsVersion, SA); if (Opts.DebugNameTable == static_cast(llvm::DICompileUnit::DebugNameTableKind::GNU)) GenerateArg(Args, OPT_ggnu_pubnames, SA); else if (Opts.DebugNameTable == static_cast( llvm::DICompileUnit::DebugNameTableKind::Default)) GenerateArg(Args, OPT_gpubnames, SA); auto TNK = Opts.getDebugSimpleTemplateNames(); if (TNK != codegenoptions::DebugTemplateNamesKind::Full) { if (TNK == codegenoptions::DebugTemplateNamesKind::Simple) GenerateArg(Args, OPT_gsimple_template_names_EQ, "simple", SA); else if (TNK == codegenoptions::DebugTemplateNamesKind::Mangled) GenerateArg(Args, OPT_gsimple_template_names_EQ, "mangled", SA); } // ProfileInstrumentUsePath is marshalled automatically, no need to generate // it or PGOUseInstrumentor. if (Opts.TimePasses) { if (Opts.TimePassesPerRun) GenerateArg(Args, OPT_ftime_report_EQ, "per-pass-run", SA); else GenerateArg(Args, OPT_ftime_report, SA); } if (Opts.PrepareForLTO && !Opts.PrepareForThinLTO) GenerateArg(Args, OPT_flto_EQ, "full", SA); if (Opts.PrepareForThinLTO) GenerateArg(Args, OPT_flto_EQ, "thin", SA); if (!Opts.ThinLTOIndexFile.empty()) GenerateArg(Args, OPT_fthinlto_index_EQ, Opts.ThinLTOIndexFile, SA); if (Opts.SaveTempsFilePrefix == OutputFile) GenerateArg(Args, OPT_save_temps_EQ, "obj", SA); StringRef MemProfileBasename("memprof.profraw"); if (!Opts.MemoryProfileOutput.empty()) { if (Opts.MemoryProfileOutput == MemProfileBasename) { GenerateArg(Args, OPT_fmemory_profile, SA); } else { size_t ArgLength = Opts.MemoryProfileOutput.size() - MemProfileBasename.size(); GenerateArg(Args, OPT_fmemory_profile_EQ, Opts.MemoryProfileOutput.substr(0, ArgLength), SA); } } if (memcmp(Opts.CoverageVersion, "408*", 4) != 0) GenerateArg(Args, OPT_coverage_version_EQ, StringRef(Opts.CoverageVersion, 4), SA); // TODO: Check if we need to generate arguments stored in CmdArgs. (Namely // '-fembed_bitcode', which does not map to any CompilerInvocation field and // won't be generated.) if (Opts.XRayInstrumentationBundle.Mask != XRayInstrKind::All) { std::string InstrBundle = serializeXRayInstrumentationBundle(Opts.XRayInstrumentationBundle); if (!InstrBundle.empty()) GenerateArg(Args, OPT_fxray_instrumentation_bundle, InstrBundle, SA); } if (Opts.CFProtectionReturn && Opts.CFProtectionBranch) GenerateArg(Args, OPT_fcf_protection_EQ, "full", SA); else if (Opts.CFProtectionReturn) GenerateArg(Args, OPT_fcf_protection_EQ, "return", SA); else if (Opts.CFProtectionBranch) GenerateArg(Args, OPT_fcf_protection_EQ, "branch", SA); for (const auto &F : Opts.LinkBitcodeFiles) { bool Builtint = F.LinkFlags == llvm::Linker::Flags::LinkOnlyNeeded && F.PropagateAttrs && F.Internalize; GenerateArg(Args, Builtint ? OPT_mlink_builtin_bitcode : OPT_mlink_bitcode_file, F.Filename, SA); } // TODO: Consider removing marshalling annotations from f[no_]emulated_tls. // That would make it easy to generate the option only **once** if it was // explicitly set to non-default value. if (Opts.ExplicitEmulatedTLS) { GenerateArg( Args, Opts.EmulatedTLS ? OPT_femulated_tls : OPT_fno_emulated_tls, SA); } if (Opts.FPDenormalMode != llvm::DenormalMode::getIEEE()) GenerateArg(Args, OPT_fdenormal_fp_math_EQ, Opts.FPDenormalMode.str(), SA); if (Opts.FP32DenormalMode != llvm::DenormalMode::getIEEE()) GenerateArg(Args, OPT_fdenormal_fp_math_f32_EQ, Opts.FP32DenormalMode.str(), SA); if (Opts.StructReturnConvention == CodeGenOptions::SRCK_OnStack) { OptSpecifier Opt = T.isPPC32() ? OPT_maix_struct_return : OPT_fpcc_struct_return; GenerateArg(Args, Opt, SA); } else if (Opts.StructReturnConvention == CodeGenOptions::SRCK_InRegs) { OptSpecifier Opt = T.isPPC32() ? OPT_msvr4_struct_return : OPT_freg_struct_return; GenerateArg(Args, Opt, SA); } if (Opts.EnableAIXExtendedAltivecABI) GenerateArg(Args, OPT_mabi_EQ_vec_extabi, SA); if (!Opts.OptRecordPasses.empty()) GenerateArg(Args, OPT_opt_record_passes, Opts.OptRecordPasses, SA); if (!Opts.OptRecordFormat.empty()) GenerateArg(Args, OPT_opt_record_format, Opts.OptRecordFormat, SA); GenerateOptimizationRemark(Args, SA, OPT_Rpass_EQ, "pass", Opts.OptimizationRemark); GenerateOptimizationRemark(Args, SA, OPT_Rpass_missed_EQ, "pass-missed", Opts.OptimizationRemarkMissed); GenerateOptimizationRemark(Args, SA, OPT_Rpass_analysis_EQ, "pass-analysis", Opts.OptimizationRemarkAnalysis); GenerateArg(Args, OPT_fdiagnostics_hotness_threshold_EQ, Opts.DiagnosticsHotnessThreshold ? Twine(*Opts.DiagnosticsHotnessThreshold) : "auto", SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.SanitizeRecover)) GenerateArg(Args, OPT_fsanitize_recover_EQ, Sanitizer, SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.SanitizeTrap)) GenerateArg(Args, OPT_fsanitize_trap_EQ, Sanitizer, SA); if (!Opts.EmitVersionIdentMetadata) GenerateArg(Args, OPT_Qn, SA); switch (Opts.FiniteLoops) { case CodeGenOptions::FiniteLoopsKind::Language: break; case CodeGenOptions::FiniteLoopsKind::Always: GenerateArg(Args, OPT_ffinite_loops, SA); break; case CodeGenOptions::FiniteLoopsKind::Never: GenerateArg(Args, OPT_fno_finite_loops, SA); break; } } bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, DiagnosticsEngine &Diags, const llvm::Triple &T, const std::string &OutputFile, const LangOptions &LangOptsRef) { unsigned NumErrorsBefore = Diags.getNumErrors(); unsigned OptimizationLevel = getOptimizationLevel(Args, IK, Diags); // TODO: This could be done in Driver unsigned MaxOptLevel = 3; if (OptimizationLevel > MaxOptLevel) { // If the optimization level is not supported, fall back on the default // optimization Diags.Report(diag::warn_drv_optimization_value) << Args.getLastArg(OPT_O)->getAsString(Args) << "-O" << MaxOptLevel; OptimizationLevel = MaxOptLevel; } Opts.OptimizationLevel = OptimizationLevel; // The key paths of codegen options defined in Options.td start with // "CodeGenOpts.". Let's provide the expected variable name and type. CodeGenOptions &CodeGenOpts = Opts; // Some codegen options depend on language options. Let's provide the expected // variable name and type. const LangOptions *LangOpts = &LangOptsRef; #define CODEGEN_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef CODEGEN_OPTION_WITH_MARSHALLING // At O0 we want to fully disable inlining outside of cases marked with // 'alwaysinline' that are required for correctness. Opts.setInlining((Opts.OptimizationLevel == 0) ? CodeGenOptions::OnlyAlwaysInlining : CodeGenOptions::NormalInlining); // Explicit inlining flags can disable some or all inlining even at // optimization levels above zero. if (Arg *InlineArg = Args.getLastArg( options::OPT_finline_functions, options::OPT_finline_hint_functions, options::OPT_fno_inline_functions, options::OPT_fno_inline)) { if (Opts.OptimizationLevel > 0) { const Option &InlineOpt = InlineArg->getOption(); if (InlineOpt.matches(options::OPT_finline_functions)) Opts.setInlining(CodeGenOptions::NormalInlining); else if (InlineOpt.matches(options::OPT_finline_hint_functions)) Opts.setInlining(CodeGenOptions::OnlyHintInlining); else Opts.setInlining(CodeGenOptions::OnlyAlwaysInlining); } } // PIC defaults to -fno-direct-access-external-data while non-PIC defaults to // -fdirect-access-external-data. Opts.DirectAccessExternalData = Args.hasArg(OPT_fdirect_access_external_data) || (!Args.hasArg(OPT_fno_direct_access_external_data) && LangOpts->PICLevel == 0); if (Arg *A = Args.getLastArg(OPT_debug_info_kind_EQ)) { unsigned Val = llvm::StringSwitch(A->getValue()) .Case("line-tables-only", codegenoptions::DebugLineTablesOnly) .Case("line-directives-only", codegenoptions::DebugDirectivesOnly) .Case("constructor", codegenoptions::DebugInfoConstructor) .Case("limited", codegenoptions::LimitedDebugInfo) .Case("standalone", codegenoptions::FullDebugInfo) .Case("unused-types", codegenoptions::UnusedTypeInfo) .Default(~0U); if (Val == ~0U) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); else Opts.setDebugInfo(static_cast(Val)); } // If -fuse-ctor-homing is set and limited debug info is already on, then use // constructor homing, and vice versa for -fno-use-ctor-homing. if (const Arg *A = Args.getLastArg(OPT_fuse_ctor_homing, OPT_fno_use_ctor_homing)) { if (A->getOption().matches(OPT_fuse_ctor_homing) && Opts.getDebugInfo() == codegenoptions::LimitedDebugInfo) Opts.setDebugInfo(codegenoptions::DebugInfoConstructor); if (A->getOption().matches(OPT_fno_use_ctor_homing) && Opts.getDebugInfo() == codegenoptions::DebugInfoConstructor) Opts.setDebugInfo(codegenoptions::LimitedDebugInfo); } for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ)) { auto Split = StringRef(Arg).split('='); Opts.DebugPrefixMap.insert( {std::string(Split.first), std::string(Split.second)}); } for (const auto &Arg : Args.getAllArgValues(OPT_fcoverage_prefix_map_EQ)) { auto Split = StringRef(Arg).split('='); Opts.CoveragePrefixMap.insert( {std::string(Split.first), std::string(Split.second)}); } const llvm::Triple::ArchType DebugEntryValueArchs[] = { llvm::Triple::x86, llvm::Triple::x86_64, llvm::Triple::aarch64, llvm::Triple::arm, llvm::Triple::armeb, llvm::Triple::mips, llvm::Triple::mipsel, llvm::Triple::mips64, llvm::Triple::mips64el}; if (Opts.OptimizationLevel > 0 && Opts.hasReducedDebugInfo() && llvm::is_contained(DebugEntryValueArchs, T.getArch())) Opts.EmitCallSiteInfo = true; if (!Opts.EnableDIPreservationVerify && Opts.DIBugsReportFilePath.size()) { Diags.Report(diag::warn_ignoring_verify_debuginfo_preserve_export) << Opts.DIBugsReportFilePath; Opts.DIBugsReportFilePath = ""; } Opts.NewStructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa) && Args.hasArg(OPT_new_struct_path_tbaa); Opts.OptimizeSize = getOptimizationLevelSize(Args); Opts.SimplifyLibCalls = !LangOpts->NoBuiltin; if (Opts.SimplifyLibCalls) Opts.NoBuiltinFuncs = LangOpts->NoBuiltinFuncs; Opts.UnrollLoops = Args.hasFlag(OPT_funroll_loops, OPT_fno_unroll_loops, (Opts.OptimizationLevel > 1)); Opts.BinutilsVersion = std::string(Args.getLastArgValue(OPT_fbinutils_version_EQ)); Opts.DebugNameTable = static_cast( Args.hasArg(OPT_ggnu_pubnames) ? llvm::DICompileUnit::DebugNameTableKind::GNU : Args.hasArg(OPT_gpubnames) ? llvm::DICompileUnit::DebugNameTableKind::Default : llvm::DICompileUnit::DebugNameTableKind::None); if (const Arg *A = Args.getLastArg(OPT_gsimple_template_names_EQ)) { StringRef Value = A->getValue(); if (Value != "simple" && Value != "mangled") Diags.Report(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << A->getValue(); Opts.setDebugSimpleTemplateNames( StringRef(A->getValue()) == "simple" ? codegenoptions::DebugTemplateNamesKind::Simple : codegenoptions::DebugTemplateNamesKind::Mangled); } if (!Opts.ProfileInstrumentUsePath.empty()) setPGOUseInstrumentor(Opts, Opts.ProfileInstrumentUsePath); if (const Arg *A = Args.getLastArg(OPT_ftime_report, OPT_ftime_report_EQ)) { Opts.TimePasses = true; // -ftime-report= is only for new pass manager. if (A->getOption().getID() == OPT_ftime_report_EQ) { if (Opts.LegacyPassManager) Diags.Report(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-fno-legacy-pass-manager"; StringRef Val = A->getValue(); if (Val == "per-pass") Opts.TimePassesPerRun = false; else if (Val == "per-pass-run") Opts.TimePassesPerRun = true; else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } } Opts.PrepareForLTO = false; Opts.PrepareForThinLTO = false; if (Arg *A = Args.getLastArg(OPT_flto_EQ)) { Opts.PrepareForLTO = true; StringRef S = A->getValue(); if (S == "thin") Opts.PrepareForThinLTO = true; else if (S != "full") Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << S; } if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) { if (IK.getLanguage() != Language::LLVM_IR) Diags.Report(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-x ir"; Opts.ThinLTOIndexFile = std::string(Args.getLastArgValue(OPT_fthinlto_index_EQ)); } if (Arg *A = Args.getLastArg(OPT_save_temps_EQ)) Opts.SaveTempsFilePrefix = llvm::StringSwitch(A->getValue()) .Case("obj", OutputFile) .Default(llvm::sys::path::filename(OutputFile).str()); // The memory profile runtime appends the pid to make this name more unique. const char *MemProfileBasename = "memprof.profraw"; if (Args.hasArg(OPT_fmemory_profile_EQ)) { SmallString<128> Path( std::string(Args.getLastArgValue(OPT_fmemory_profile_EQ))); llvm::sys::path::append(Path, MemProfileBasename); Opts.MemoryProfileOutput = std::string(Path); } else if (Args.hasArg(OPT_fmemory_profile)) Opts.MemoryProfileOutput = MemProfileBasename; memcpy(Opts.CoverageVersion, "408*", 4); if (Opts.EmitGcovArcs || Opts.EmitGcovNotes) { if (Args.hasArg(OPT_coverage_version_EQ)) { StringRef CoverageVersion = Args.getLastArgValue(OPT_coverage_version_EQ); if (CoverageVersion.size() != 4) { Diags.Report(diag::err_drv_invalid_value) << Args.getLastArg(OPT_coverage_version_EQ)->getAsString(Args) << CoverageVersion; } else { memcpy(Opts.CoverageVersion, CoverageVersion.data(), 4); } } } // FIXME: For backend options that are not yet recorded as function // attributes in the IR, keep track of them so we can embed them in a // separate data section and use them when building the bitcode. for (const auto &A : Args) { // Do not encode output and input. if (A->getOption().getID() == options::OPT_o || A->getOption().getID() == options::OPT_INPUT || A->getOption().getID() == options::OPT_x || A->getOption().getID() == options::OPT_fembed_bitcode || A->getOption().matches(options::OPT_W_Group)) continue; ArgStringList ASL; A->render(Args, ASL); for (const auto &arg : ASL) { StringRef ArgStr(arg); Opts.CmdArgs.insert(Opts.CmdArgs.end(), ArgStr.begin(), ArgStr.end()); // using \00 to separate each commandline options. Opts.CmdArgs.push_back('\0'); } } auto XRayInstrBundles = Args.getAllArgValues(OPT_fxray_instrumentation_bundle); if (XRayInstrBundles.empty()) Opts.XRayInstrumentationBundle.Mask = XRayInstrKind::All; else for (const auto &A : XRayInstrBundles) parseXRayInstrumentationBundle("-fxray-instrumentation-bundle=", A, Args, Diags, Opts.XRayInstrumentationBundle); if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { StringRef Name = A->getValue(); if (Name == "full") { Opts.CFProtectionReturn = 1; Opts.CFProtectionBranch = 1; } else if (Name == "return") Opts.CFProtectionReturn = 1; else if (Name == "branch") Opts.CFProtectionBranch = 1; else if (Name != "none") Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } if (Opts.PrepareForLTO && Args.hasArg(OPT_mibt_seal)) Opts.IBTSeal = 1; for (auto *A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_builtin_bitcode)) { CodeGenOptions::BitcodeFileToLink F; F.Filename = A->getValue(); if (A->getOption().matches(OPT_mlink_builtin_bitcode)) { F.LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded; // When linking CUDA bitcode, propagate function attributes so that // e.g. libdevice gets fast-math attrs if we're building with fast-math. F.PropagateAttrs = true; F.Internalize = true; } Opts.LinkBitcodeFiles.push_back(F); } if (Args.getLastArg(OPT_femulated_tls) || Args.getLastArg(OPT_fno_emulated_tls)) { Opts.ExplicitEmulatedTLS = true; } if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) { if (T.isOSAIX()) { StringRef Name = A->getValue(); if (Name != "global-dynamic") Diags.Report(diag::err_aix_unsupported_tls_model) << Name; } } if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_EQ)) { StringRef Val = A->getValue(); Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val); if (!Opts.FPDenormalMode.isValid()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_f32_EQ)) { StringRef Val = A->getValue(); Opts.FP32DenormalMode = llvm::parseDenormalFPAttribute(Val); if (!Opts.FP32DenormalMode.isValid()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } // X86_32 has -fppc-struct-return and -freg-struct-return. // PPC32 has -maix-struct-return and -msvr4-struct-return. if (Arg *A = Args.getLastArg(OPT_fpcc_struct_return, OPT_freg_struct_return, OPT_maix_struct_return, OPT_msvr4_struct_return)) { // TODO: We might want to consider enabling these options on AIX in the // future. if (T.isOSAIX()) Diags.Report(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << T.str(); const Option &O = A->getOption(); if (O.matches(OPT_fpcc_struct_return) || O.matches(OPT_maix_struct_return)) { Opts.setStructReturnConvention(CodeGenOptions::SRCK_OnStack); } else { assert(O.matches(OPT_freg_struct_return) || O.matches(OPT_msvr4_struct_return)); Opts.setStructReturnConvention(CodeGenOptions::SRCK_InRegs); } } if (Arg *A = Args.getLastArg(OPT_mabi_EQ_vec_default, OPT_mabi_EQ_vec_extabi)) { if (!T.isOSAIX()) Diags.Report(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << T.str(); const Option &O = A->getOption(); Opts.EnableAIXExtendedAltivecABI = O.matches(OPT_mabi_EQ_vec_extabi); } bool NeedLocTracking = false; if (!Opts.OptRecordFile.empty()) NeedLocTracking = true; if (Arg *A = Args.getLastArg(OPT_opt_record_passes)) { Opts.OptRecordPasses = A->getValue(); NeedLocTracking = true; } if (Arg *A = Args.getLastArg(OPT_opt_record_format)) { Opts.OptRecordFormat = A->getValue(); NeedLocTracking = true; } Opts.OptimizationRemark = ParseOptimizationRemark(Diags, Args, OPT_Rpass_EQ, "pass"); Opts.OptimizationRemarkMissed = ParseOptimizationRemark(Diags, Args, OPT_Rpass_missed_EQ, "pass-missed"); Opts.OptimizationRemarkAnalysis = ParseOptimizationRemark( Diags, Args, OPT_Rpass_analysis_EQ, "pass-analysis"); NeedLocTracking |= Opts.OptimizationRemark.hasValidPattern() || Opts.OptimizationRemarkMissed.hasValidPattern() || Opts.OptimizationRemarkAnalysis.hasValidPattern(); bool UsingSampleProfile = !Opts.SampleProfileFile.empty(); bool UsingProfile = UsingSampleProfile || (Opts.getProfileUse() != CodeGenOptions::ProfileNone); if (Opts.DiagnosticsWithHotness && !UsingProfile && // An IR file will contain PGO as metadata IK.getLanguage() != Language::LLVM_IR) Diags.Report(diag::warn_drv_diagnostics_hotness_requires_pgo) << "-fdiagnostics-show-hotness"; // Parse remarks hotness threshold. Valid value is either integer or 'auto'. if (auto *arg = Args.getLastArg(options::OPT_fdiagnostics_hotness_threshold_EQ)) { auto ResultOrErr = llvm::remarks::parseHotnessThresholdOption(arg->getValue()); if (!ResultOrErr) { Diags.Report(diag::err_drv_invalid_diagnotics_hotness_threshold) << "-fdiagnostics-hotness-threshold="; } else { Opts.DiagnosticsHotnessThreshold = *ResultOrErr; if ((!Opts.DiagnosticsHotnessThreshold.hasValue() || Opts.DiagnosticsHotnessThreshold.getValue() > 0) && !UsingProfile) Diags.Report(diag::warn_drv_diagnostics_hotness_requires_pgo) << "-fdiagnostics-hotness-threshold="; } } // If the user requested to use a sample profile for PGO, then the // backend will need to track source location information so the profile // can be incorporated into the IR. if (UsingSampleProfile) NeedLocTracking = true; if (!Opts.StackUsageOutput.empty()) NeedLocTracking = true; // If the user requested a flag that requires source locations available in // the backend, make sure that the backend tracks source location information. if (NeedLocTracking && Opts.getDebugInfo() == codegenoptions::NoDebugInfo) Opts.setDebugInfo(codegenoptions::LocTrackingOnly); // Parse -fsanitize-recover= arguments. // FIXME: Report unrecoverable sanitizers incorrectly specified here. parseSanitizerKinds("-fsanitize-recover=", Args.getAllArgValues(OPT_fsanitize_recover_EQ), Diags, Opts.SanitizeRecover); parseSanitizerKinds("-fsanitize-trap=", Args.getAllArgValues(OPT_fsanitize_trap_EQ), Diags, Opts.SanitizeTrap); Opts.EmitVersionIdentMetadata = Args.hasFlag(OPT_Qy, OPT_Qn, true); if (Args.hasArg(options::OPT_ffinite_loops)) Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Always; else if (Args.hasArg(options::OPT_fno_finite_loops)) Opts.FiniteLoops = CodeGenOptions::FiniteLoopsKind::Never; Opts.EmitIEEENaNCompliantInsts = Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee); if (!Opts.EmitIEEENaNCompliantInsts && !LangOptsRef.NoHonorNaNs) Diags.Report(diag::err_drv_amdgpu_ieee_without_no_honor_nans); return Diags.getNumErrors() == NumErrorsBefore; } static void GenerateDependencyOutputArgs(const DependencyOutputOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const DependencyOutputOptions &DependencyOutputOpts = Opts; #define DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING if (Opts.ShowIncludesDest != ShowIncludesDestination::None) GenerateArg(Args, OPT_show_includes, SA); for (const auto &Dep : Opts.ExtraDeps) { switch (Dep.second) { case EDK_SanitizeIgnorelist: // Sanitizer ignorelist arguments are generated from LanguageOptions. continue; case EDK_ModuleFile: // Module file arguments are generated from FrontendOptions and // HeaderSearchOptions. continue; case EDK_ProfileList: // Profile list arguments are generated from LanguageOptions via the // marshalling infrastructure. continue; case EDK_DepFileEntry: GenerateArg(Args, OPT_fdepfile_entry, Dep.first, SA); break; } } } static bool ParseDependencyOutputArgs(DependencyOutputOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, frontend::ActionKind Action, bool ShowLineMarkers) { unsigned NumErrorsBefore = Diags.getNumErrors(); DependencyOutputOptions &DependencyOutputOpts = Opts; #define DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DEPENDENCY_OUTPUT_OPTION_WITH_MARSHALLING if (Args.hasArg(OPT_show_includes)) { // Writing both /showIncludes and preprocessor output to stdout // would produce interleaved output, so use stderr for /showIncludes. // This behaves the same as cl.exe, when /E, /EP or /P are passed. if (Action == frontend::PrintPreprocessedInput || !ShowLineMarkers) Opts.ShowIncludesDest = ShowIncludesDestination::Stderr; else Opts.ShowIncludesDest = ShowIncludesDestination::Stdout; } else { Opts.ShowIncludesDest = ShowIncludesDestination::None; } // Add sanitizer ignorelists as extra dependencies. // They won't be discovered by the regular preprocessor, so // we let make / ninja to know about this implicit dependency. if (!Args.hasArg(OPT_fno_sanitize_ignorelist)) { for (const auto *A : Args.filtered(OPT_fsanitize_ignorelist_EQ)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ExtraDeps.emplace_back(std::string(Val), EDK_SanitizeIgnorelist); } if (Opts.IncludeSystemHeaders) { for (const auto *A : Args.filtered(OPT_fsanitize_system_ignorelist_EQ)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ExtraDeps.emplace_back(std::string(Val), EDK_SanitizeIgnorelist); } } } // -fprofile-list= dependencies. for (const auto &Filename : Args.getAllArgValues(OPT_fprofile_list_EQ)) Opts.ExtraDeps.emplace_back(Filename, EDK_ProfileList); // Propagate the extra dependencies. for (const auto *A : Args.filtered(OPT_fdepfile_entry)) Opts.ExtraDeps.emplace_back(A->getValue(), EDK_DepFileEntry); // Only the -fmodule-file= form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ExtraDeps.emplace_back(std::string(Val), EDK_ModuleFile); } return Diags.getNumErrors() == NumErrorsBefore; } static bool parseShowColorsArgs(const ArgList &Args, bool DefaultColor) { // Color diagnostics default to auto ("on" if terminal supports) in the driver // but default to off in cc1, needing an explicit OPT_fdiagnostics_color. // Support both clang's -f[no-]color-diagnostics and gcc's // -f[no-]diagnostics-colors[=never|always|auto]. enum { Colors_On, Colors_Off, Colors_Auto } ShowColors = DefaultColor ? Colors_Auto : Colors_Off; for (auto *A : Args) { const Option &O = A->getOption(); if (O.matches(options::OPT_fcolor_diagnostics) || O.matches(options::OPT_fdiagnostics_color)) { ShowColors = Colors_On; } else if (O.matches(options::OPT_fno_color_diagnostics) || O.matches(options::OPT_fno_diagnostics_color)) { ShowColors = Colors_Off; } else if (O.matches(options::OPT_fdiagnostics_color_EQ)) { StringRef Value(A->getValue()); if (Value == "always") ShowColors = Colors_On; else if (Value == "never") ShowColors = Colors_Off; else if (Value == "auto") ShowColors = Colors_Auto; } } return ShowColors == Colors_On || (ShowColors == Colors_Auto && llvm::sys::Process::StandardErrHasColors()); } static bool checkVerifyPrefixes(const std::vector &VerifyPrefixes, DiagnosticsEngine &Diags) { bool Success = true; for (const auto &Prefix : VerifyPrefixes) { // Every prefix must start with a letter and contain only alphanumeric // characters, hyphens, and underscores. auto BadChar = llvm::find_if(Prefix, [](char C) { return !isAlphanumeric(C) && C != '-' && C != '_'; }); if (BadChar != Prefix.end() || !isLetter(Prefix[0])) { Success = false; Diags.Report(diag::err_drv_invalid_value) << "-verify=" << Prefix; Diags.Report(diag::note_drv_verify_prefix_spelling); } } return Success; } static void GenerateFileSystemArgs(const FileSystemOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const FileSystemOptions &FileSystemOpts = Opts; #define FILE_SYSTEM_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FILE_SYSTEM_OPTION_WITH_MARSHALLING } static bool ParseFileSystemArgs(FileSystemOptions &Opts, const ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); FileSystemOptions &FileSystemOpts = Opts; #define FILE_SYSTEM_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FILE_SYSTEM_OPTION_WITH_MARSHALLING return Diags.getNumErrors() == NumErrorsBefore; } static void GenerateMigratorArgs(const MigratorOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const MigratorOptions &MigratorOpts = Opts; #define MIGRATOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef MIGRATOR_OPTION_WITH_MARSHALLING } static bool ParseMigratorArgs(MigratorOptions &Opts, const ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); MigratorOptions &MigratorOpts = Opts; #define MIGRATOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef MIGRATOR_OPTION_WITH_MARSHALLING return Diags.getNumErrors() == NumErrorsBefore; } void CompilerInvocation::GenerateDiagnosticArgs( const DiagnosticOptions &Opts, SmallVectorImpl &Args, StringAllocator SA, bool DefaultDiagColor) { const DiagnosticOptions *DiagnosticOpts = &Opts; #define DIAG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DIAG_OPTION_WITH_MARSHALLING if (!Opts.DiagnosticSerializationFile.empty()) GenerateArg(Args, OPT_diagnostic_serialized_file, Opts.DiagnosticSerializationFile, SA); if (Opts.ShowColors) GenerateArg(Args, OPT_fcolor_diagnostics, SA); if (Opts.VerifyDiagnostics && llvm::is_contained(Opts.VerifyPrefixes, "expected")) GenerateArg(Args, OPT_verify, SA); for (const auto &Prefix : Opts.VerifyPrefixes) if (Prefix != "expected") GenerateArg(Args, OPT_verify_EQ, Prefix, SA); DiagnosticLevelMask VIU = Opts.getVerifyIgnoreUnexpected(); if (VIU == DiagnosticLevelMask::None) { // This is the default, don't generate anything. } else if (VIU == DiagnosticLevelMask::All) { GenerateArg(Args, OPT_verify_ignore_unexpected, SA); } else { if (static_cast(VIU & DiagnosticLevelMask::Note) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "note", SA); if (static_cast(VIU & DiagnosticLevelMask::Remark) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "remark", SA); if (static_cast(VIU & DiagnosticLevelMask::Warning) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "warning", SA); if (static_cast(VIU & DiagnosticLevelMask::Error) != 0) GenerateArg(Args, OPT_verify_ignore_unexpected_EQ, "error", SA); } for (const auto &Warning : Opts.Warnings) { // This option is automatically generated from UndefPrefixes. if (Warning == "undef-prefix") continue; Args.push_back(SA(StringRef("-W") + Warning)); } for (const auto &Remark : Opts.Remarks) { // These arguments are generated from OptimizationRemark fields of // CodeGenOptions. StringRef IgnoredRemarks[] = {"pass", "no-pass", "pass-analysis", "no-pass-analysis", "pass-missed", "no-pass-missed"}; if (llvm::is_contained(IgnoredRemarks, Remark)) continue; Args.push_back(SA(StringRef("-R") + Remark)); } } std::unique_ptr clang::CreateAndPopulateDiagOpts(ArrayRef Argv) { auto DiagOpts = std::make_unique(); unsigned MissingArgIndex, MissingArgCount; InputArgList Args = getDriverOptTable().ParseArgs( Argv.slice(1), MissingArgIndex, MissingArgCount); // We ignore MissingArgCount and the return value of ParseDiagnosticArgs. // Any errors that would be diagnosed here will also be diagnosed later, // when the DiagnosticsEngine actually exists. (void)ParseDiagnosticArgs(*DiagOpts, Args); return DiagOpts; } bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, DiagnosticsEngine *Diags, bool DefaultDiagColor) { Optional IgnoringDiags; if (!Diags) { IgnoringDiags.emplace(new DiagnosticIDs(), new DiagnosticOptions(), new IgnoringDiagConsumer()); Diags = &*IgnoringDiags; } unsigned NumErrorsBefore = Diags->getNumErrors(); // The key paths of diagnostic options defined in Options.td start with // "DiagnosticOpts->". Let's provide the expected variable name and type. DiagnosticOptions *DiagnosticOpts = &Opts; #define DIAG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, *Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef DIAG_OPTION_WITH_MARSHALLING llvm::sys::Process::UseANSIEscapeCodes(Opts.UseANSIEscapeCodes); if (Arg *A = Args.getLastArg(OPT_diagnostic_serialized_file, OPT__serialize_diags)) Opts.DiagnosticSerializationFile = A->getValue(); Opts.ShowColors = parseShowColorsArgs(Args, DefaultDiagColor); Opts.VerifyDiagnostics = Args.hasArg(OPT_verify) || Args.hasArg(OPT_verify_EQ); Opts.VerifyPrefixes = Args.getAllArgValues(OPT_verify_EQ); if (Args.hasArg(OPT_verify)) Opts.VerifyPrefixes.push_back("expected"); // Keep VerifyPrefixes in its original order for the sake of diagnostics, and // then sort it to prepare for fast lookup using std::binary_search. if (!checkVerifyPrefixes(Opts.VerifyPrefixes, *Diags)) Opts.VerifyDiagnostics = false; else llvm::sort(Opts.VerifyPrefixes); DiagnosticLevelMask DiagMask = DiagnosticLevelMask::None; parseDiagnosticLevelMask( "-verify-ignore-unexpected=", Args.getAllArgValues(OPT_verify_ignore_unexpected_EQ), *Diags, DiagMask); if (Args.hasArg(OPT_verify_ignore_unexpected)) DiagMask = DiagnosticLevelMask::All; Opts.setVerifyIgnoreUnexpected(DiagMask); if (Opts.TabStop == 0 || Opts.TabStop > DiagnosticOptions::MaxTabStop) { Opts.TabStop = DiagnosticOptions::DefaultTabStop; Diags->Report(diag::warn_ignoring_ftabstop_value) << Opts.TabStop << DiagnosticOptions::DefaultTabStop; } addDiagnosticArgs(Args, OPT_W_Group, OPT_W_value_Group, Opts.Warnings); addDiagnosticArgs(Args, OPT_R_Group, OPT_R_value_Group, Opts.Remarks); return Diags->getNumErrors() == NumErrorsBefore; } /// Parse the argument to the -ftest-module-file-extension /// command-line argument. /// /// \returns true on error, false on success. static bool parseTestModuleFileExtensionArg(StringRef Arg, std::string &BlockName, unsigned &MajorVersion, unsigned &MinorVersion, bool &Hashed, std::string &UserInfo) { SmallVector Args; Arg.split(Args, ':', 5); if (Args.size() < 5) return true; BlockName = std::string(Args[0]); if (Args[1].getAsInteger(10, MajorVersion)) return true; if (Args[2].getAsInteger(10, MinorVersion)) return true; if (Args[3].getAsInteger(2, Hashed)) return true; if (Args.size() > 4) UserInfo = std::string(Args[4]); return false; } /// Return a table that associates command line option specifiers with the /// frontend action. Note: The pair {frontend::PluginAction, OPT_plugin} is /// intentionally missing, as this case is handled separately from other /// frontend options. static const auto &getFrontendActionTable() { static const std::pair Table[] = { {frontend::ASTDeclList, OPT_ast_list}, {frontend::ASTDump, OPT_ast_dump_all_EQ}, {frontend::ASTDump, OPT_ast_dump_all}, {frontend::ASTDump, OPT_ast_dump_EQ}, {frontend::ASTDump, OPT_ast_dump}, {frontend::ASTDump, OPT_ast_dump_lookups}, {frontend::ASTDump, OPT_ast_dump_decl_types}, {frontend::ASTPrint, OPT_ast_print}, {frontend::ASTView, OPT_ast_view}, {frontend::DumpCompilerOptions, OPT_compiler_options_dump}, {frontend::DumpRawTokens, OPT_dump_raw_tokens}, {frontend::DumpTokens, OPT_dump_tokens}, {frontend::EmitAssembly, OPT_S}, {frontend::EmitBC, OPT_emit_llvm_bc}, {frontend::EmitHTML, OPT_emit_html}, {frontend::EmitLLVM, OPT_emit_llvm}, {frontend::EmitLLVMOnly, OPT_emit_llvm_only}, {frontend::EmitCodeGenOnly, OPT_emit_codegen_only}, {frontend::EmitCodeGenOnly, OPT_emit_codegen_only}, {frontend::EmitObj, OPT_emit_obj}, {frontend::ExtractAPI, OPT_extract_api}, {frontend::FixIt, OPT_fixit_EQ}, {frontend::FixIt, OPT_fixit}, {frontend::GenerateModule, OPT_emit_module}, {frontend::GenerateModuleInterface, OPT_emit_module_interface}, {frontend::GenerateHeaderModule, OPT_emit_header_module}, {frontend::GeneratePCH, OPT_emit_pch}, {frontend::GenerateInterfaceStubs, OPT_emit_interface_stubs}, {frontend::InitOnly, OPT_init_only}, {frontend::ParseSyntaxOnly, OPT_fsyntax_only}, {frontend::ModuleFileInfo, OPT_module_file_info}, {frontend::VerifyPCH, OPT_verify_pch}, {frontend::PrintPreamble, OPT_print_preamble}, {frontend::PrintPreprocessedInput, OPT_E}, {frontend::TemplightDump, OPT_templight_dump}, {frontend::RewriteMacros, OPT_rewrite_macros}, {frontend::RewriteObjC, OPT_rewrite_objc}, {frontend::RewriteTest, OPT_rewrite_test}, {frontend::RunAnalysis, OPT_analyze}, {frontend::MigrateSource, OPT_migrate}, {frontend::RunPreprocessorOnly, OPT_Eonly}, {frontend::PrintDependencyDirectivesSourceMinimizerOutput, OPT_print_dependency_directives_minimized_source}, }; return Table; } /// Maps command line option to frontend action. static Optional getFrontendAction(OptSpecifier &Opt) { for (const auto &ActionOpt : getFrontendActionTable()) if (ActionOpt.second == Opt.getID()) return ActionOpt.first; return None; } /// Maps frontend action to command line option. static Optional getProgramActionOpt(frontend::ActionKind ProgramAction) { for (const auto &ActionOpt : getFrontendActionTable()) if (ActionOpt.first == ProgramAction) return OptSpecifier(ActionOpt.second); return None; } static void GenerateFrontendArgs(const FrontendOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, bool IsHeader) { const FrontendOptions &FrontendOpts = Opts; #define FRONTEND_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FRONTEND_OPTION_WITH_MARSHALLING Optional ProgramActionOpt = getProgramActionOpt(Opts.ProgramAction); // Generating a simple flag covers most frontend actions. std::function GenerateProgramAction = [&]() { GenerateArg(Args, *ProgramActionOpt, SA); }; if (!ProgramActionOpt) { // PluginAction is the only program action handled separately. assert(Opts.ProgramAction == frontend::PluginAction && "Frontend action without option."); GenerateProgramAction = [&]() { GenerateArg(Args, OPT_plugin, Opts.ActionName, SA); }; } // FIXME: Simplify the complex 'AST dump' command line. if (Opts.ProgramAction == frontend::ASTDump) { GenerateProgramAction = [&]() { // ASTDumpLookups, ASTDumpDeclTypes and ASTDumpFilter are generated via // marshalling infrastructure. if (Opts.ASTDumpFormat != ADOF_Default) { StringRef Format; switch (Opts.ASTDumpFormat) { case ADOF_Default: llvm_unreachable("Default AST dump format."); case ADOF_JSON: Format = "json"; break; } if (Opts.ASTDumpAll) GenerateArg(Args, OPT_ast_dump_all_EQ, Format, SA); if (Opts.ASTDumpDecls) GenerateArg(Args, OPT_ast_dump_EQ, Format, SA); } else { if (Opts.ASTDumpAll) GenerateArg(Args, OPT_ast_dump_all, SA); if (Opts.ASTDumpDecls) GenerateArg(Args, OPT_ast_dump, SA); } }; } if (Opts.ProgramAction == frontend::FixIt && !Opts.FixItSuffix.empty()) { GenerateProgramAction = [&]() { GenerateArg(Args, OPT_fixit_EQ, Opts.FixItSuffix, SA); }; } GenerateProgramAction(); for (const auto &PluginArgs : Opts.PluginArgs) { Option Opt = getDriverOptTable().getOption(OPT_plugin_arg); const char *Spelling = SA(Opt.getPrefix() + Opt.getName() + PluginArgs.first); for (const auto &PluginArg : PluginArgs.second) denormalizeString(Args, Spelling, SA, Opt.getKind(), 0, PluginArg); } for (const auto &Ext : Opts.ModuleFileExtensions) if (auto *TestExt = dyn_cast_or_null(Ext.get())) GenerateArg(Args, OPT_ftest_module_file_extension_EQ, TestExt->str(), SA); if (!Opts.CodeCompletionAt.FileName.empty()) GenerateArg(Args, OPT_code_completion_at, Opts.CodeCompletionAt.ToString(), SA); for (const auto &Plugin : Opts.Plugins) GenerateArg(Args, OPT_load, Plugin, SA); // ASTDumpDecls and ASTDumpAll already handled with ProgramAction. for (const auto &ModuleFile : Opts.ModuleFiles) GenerateArg(Args, OPT_fmodule_file, ModuleFile, SA); if (Opts.AuxTargetCPU.hasValue()) GenerateArg(Args, OPT_aux_target_cpu, *Opts.AuxTargetCPU, SA); if (Opts.AuxTargetFeatures.hasValue()) for (const auto &Feature : *Opts.AuxTargetFeatures) GenerateArg(Args, OPT_aux_target_feature, Feature, SA); { StringRef Preprocessed = Opts.DashX.isPreprocessed() ? "-cpp-output" : ""; StringRef ModuleMap = Opts.DashX.getFormat() == InputKind::ModuleMap ? "-module-map" : ""; StringRef Header = IsHeader ? "-header" : ""; StringRef Lang; switch (Opts.DashX.getLanguage()) { case Language::C: Lang = "c"; break; case Language::OpenCL: Lang = "cl"; break; case Language::OpenCLCXX: Lang = "clcpp"; break; case Language::CUDA: Lang = "cuda"; break; case Language::HIP: Lang = "hip"; break; case Language::CXX: Lang = "c++"; break; case Language::ObjC: Lang = "objective-c"; break; case Language::ObjCXX: Lang = "objective-c++"; break; case Language::RenderScript: Lang = "renderscript"; break; case Language::Asm: Lang = "assembler-with-cpp"; break; case Language::Unknown: assert(Opts.DashX.getFormat() == InputKind::Precompiled && "Generating -x argument for unknown language (not precompiled)."); Lang = "ast"; break; case Language::LLVM_IR: Lang = "ir"; break; } GenerateArg(Args, OPT_x, Lang + Header + ModuleMap + Preprocessed, SA); } // OPT_INPUT has a unique class, generate it directly. for (const auto &Input : Opts.Inputs) Args.push_back(SA(Input.getFile())); } static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, bool &IsHeaderFile) { unsigned NumErrorsBefore = Diags.getNumErrors(); FrontendOptions &FrontendOpts = Opts; #define FRONTEND_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef FRONTEND_OPTION_WITH_MARSHALLING Opts.ProgramAction = frontend::ParseSyntaxOnly; if (const Arg *A = Args.getLastArg(OPT_Action_Group)) { OptSpecifier Opt = OptSpecifier(A->getOption().getID()); Optional ProgramAction = getFrontendAction(Opt); assert(ProgramAction && "Option specifier not in Action_Group."); if (ProgramAction == frontend::ASTDump && (Opt == OPT_ast_dump_all_EQ || Opt == OPT_ast_dump_EQ)) { unsigned Val = llvm::StringSwitch(A->getValue()) .CaseLower("default", ADOF_Default) .CaseLower("json", ADOF_JSON) .Default(std::numeric_limits::max()); if (Val != std::numeric_limits::max()) Opts.ASTDumpFormat = static_cast(Val); else { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); Opts.ASTDumpFormat = ADOF_Default; } } if (ProgramAction == frontend::FixIt && Opt == OPT_fixit_EQ) Opts.FixItSuffix = A->getValue(); if (ProgramAction == frontend::GenerateInterfaceStubs) { StringRef ArgStr = Args.hasArg(OPT_interface_stub_version_EQ) ? Args.getLastArgValue(OPT_interface_stub_version_EQ) : "ifs-v1"; if (ArgStr == "experimental-yaml-elf-v1" || ArgStr == "experimental-ifs-v1" || ArgStr == "experimental-ifs-v2" || ArgStr == "experimental-tapi-elf-v1") { std::string ErrorMessage = "Invalid interface stub format: " + ArgStr.str() + " is deprecated."; Diags.Report(diag::err_drv_invalid_value) << "Must specify a valid interface stub format type, ie: " "-interface-stub-version=ifs-v1" << ErrorMessage; ProgramAction = frontend::ParseSyntaxOnly; } else if (!ArgStr.startswith("ifs-")) { std::string ErrorMessage = "Invalid interface stub format: " + ArgStr.str() + "."; Diags.Report(diag::err_drv_invalid_value) << "Must specify a valid interface stub format type, ie: " "-interface-stub-version=ifs-v1" << ErrorMessage; ProgramAction = frontend::ParseSyntaxOnly; } } Opts.ProgramAction = *ProgramAction; } if (const Arg* A = Args.getLastArg(OPT_plugin)) { Opts.Plugins.emplace_back(A->getValue(0)); Opts.ProgramAction = frontend::PluginAction; Opts.ActionName = A->getValue(); } for (const auto *AA : Args.filtered(OPT_plugin_arg)) Opts.PluginArgs[AA->getValue(0)].emplace_back(AA->getValue(1)); for (const std::string &Arg : Args.getAllArgValues(OPT_ftest_module_file_extension_EQ)) { std::string BlockName; unsigned MajorVersion; unsigned MinorVersion; bool Hashed; std::string UserInfo; if (parseTestModuleFileExtensionArg(Arg, BlockName, MajorVersion, MinorVersion, Hashed, UserInfo)) { Diags.Report(diag::err_test_module_file_extension_format) << Arg; continue; } // Add the testing module file extension. Opts.ModuleFileExtensions.push_back( std::make_shared( BlockName, MajorVersion, MinorVersion, Hashed, UserInfo)); } if (const Arg *A = Args.getLastArg(OPT_code_completion_at)) { Opts.CodeCompletionAt = ParsedSourceLocation::FromString(A->getValue()); if (Opts.CodeCompletionAt.FileName.empty()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } Opts.Plugins = Args.getAllArgValues(OPT_load); Opts.ASTDumpDecls = Args.hasArg(OPT_ast_dump, OPT_ast_dump_EQ); Opts.ASTDumpAll = Args.hasArg(OPT_ast_dump_all, OPT_ast_dump_all_EQ); // Only the -fmodule-file= form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); if (!Val.contains('=')) Opts.ModuleFiles.push_back(std::string(Val)); } if (Opts.ProgramAction != frontend::GenerateModule && Opts.IsSystemModule) Diags.Report(diag::err_drv_argument_only_allowed_with) << "-fsystem-module" << "-emit-module"; if (Args.hasArg(OPT_aux_target_cpu)) Opts.AuxTargetCPU = std::string(Args.getLastArgValue(OPT_aux_target_cpu)); if (Args.hasArg(OPT_aux_target_feature)) Opts.AuxTargetFeatures = Args.getAllArgValues(OPT_aux_target_feature); if (Opts.ARCMTAction != FrontendOptions::ARCMT_None && Opts.ObjCMTAction != FrontendOptions::ObjCMT_None) { Diags.Report(diag::err_drv_argument_not_allowed_with) << "ARC migration" << "ObjC migration"; } InputKind DashX(Language::Unknown); if (const Arg *A = Args.getLastArg(OPT_x)) { StringRef XValue = A->getValue(); // Parse suffixes: '(-header|[-module-map][-cpp-output])'. // FIXME: Supporting '-header-cpp-output' would be useful. bool Preprocessed = XValue.consume_back("-cpp-output"); bool ModuleMap = XValue.consume_back("-module-map"); IsHeaderFile = !Preprocessed && !ModuleMap && XValue != "precompiled-header" && XValue.consume_back("-header"); // Principal languages. DashX = llvm::StringSwitch(XValue) .Case("c", Language::C) .Case("cl", Language::OpenCL) .Case("clcpp", Language::OpenCLCXX) .Case("cuda", Language::CUDA) .Case("hip", Language::HIP) .Case("c++", Language::CXX) .Case("objective-c", Language::ObjC) .Case("objective-c++", Language::ObjCXX) .Case("renderscript", Language::RenderScript) .Default(Language::Unknown); // "objc[++]-cpp-output" is an acceptable synonym for // "objective-c[++]-cpp-output". if (DashX.isUnknown() && Preprocessed && !IsHeaderFile && !ModuleMap) DashX = llvm::StringSwitch(XValue) .Case("objc", Language::ObjC) .Case("objc++", Language::ObjCXX) .Default(Language::Unknown); // Some special cases cannot be combined with suffixes. if (DashX.isUnknown() && !Preprocessed && !ModuleMap && !IsHeaderFile) DashX = llvm::StringSwitch(XValue) .Case("cpp-output", InputKind(Language::C).getPreprocessed()) .Case("assembler-with-cpp", Language::Asm) .Cases("ast", "pcm", "precompiled-header", InputKind(Language::Unknown, InputKind::Precompiled)) .Case("ir", Language::LLVM_IR) .Default(Language::Unknown); if (DashX.isUnknown()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); if (Preprocessed) DashX = DashX.getPreprocessed(); if (ModuleMap) DashX = DashX.withFormat(InputKind::ModuleMap); } // '-' is the default input if none is given. std::vector Inputs = Args.getAllArgValues(OPT_INPUT); Opts.Inputs.clear(); if (Inputs.empty()) Inputs.push_back("-"); for (unsigned i = 0, e = Inputs.size(); i != e; ++i) { InputKind IK = DashX; if (IK.isUnknown()) { IK = FrontendOptions::getInputKindForExtension( StringRef(Inputs[i]).rsplit('.').second); // FIXME: Warn on this? if (IK.isUnknown()) IK = Language::C; // FIXME: Remove this hack. if (i == 0) DashX = IK; } bool IsSystem = false; // The -emit-module action implicitly takes a module map. if (Opts.ProgramAction == frontend::GenerateModule && IK.getFormat() == InputKind::Source) { IK = IK.withFormat(InputKind::ModuleMap); IsSystem = Opts.IsSystemModule; } Opts.Inputs.emplace_back(std::move(Inputs[i]), IK, IsSystem); } Opts.DashX = DashX; return Diags.getNumErrors() == NumErrorsBefore; } std::string CompilerInvocation::GetResourcesPath(const char *Argv0, void *MainAddr) { std::string ClangExecutable = llvm::sys::fs::getMainExecutable(Argv0, MainAddr); return Driver::GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); } static void GenerateHeaderSearchArgs(HeaderSearchOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const HeaderSearchOptions *HeaderSearchOpts = &Opts; #define HEADER_SEARCH_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef HEADER_SEARCH_OPTION_WITH_MARSHALLING if (Opts.UseLibcxx) GenerateArg(Args, OPT_stdlib_EQ, "libc++", SA); if (!Opts.ModuleCachePath.empty()) GenerateArg(Args, OPT_fmodules_cache_path, Opts.ModuleCachePath, SA); for (const auto &File : Opts.PrebuiltModuleFiles) GenerateArg(Args, OPT_fmodule_file, File.first + "=" + File.second, SA); for (const auto &Path : Opts.PrebuiltModulePaths) GenerateArg(Args, OPT_fprebuilt_module_path, Path, SA); for (const auto &Macro : Opts.ModulesIgnoreMacros) GenerateArg(Args, OPT_fmodules_ignore_macro, Macro.val(), SA); auto Matches = [](const HeaderSearchOptions::Entry &Entry, llvm::ArrayRef Groups, llvm::Optional IsFramework, llvm::Optional IgnoreSysRoot) { return llvm::is_contained(Groups, Entry.Group) && (!IsFramework || (Entry.IsFramework == *IsFramework)) && (!IgnoreSysRoot || (Entry.IgnoreSysRoot == *IgnoreSysRoot)); }; auto It = Opts.UserEntries.begin(); auto End = Opts.UserEntries.end(); // Add -I..., -F..., and -index-header-map options in order. for (; It < End && Matches(*It, {frontend::IndexHeaderMap, frontend::Angled}, None, true); ++It) { OptSpecifier Opt = [It, Matches]() { if (Matches(*It, frontend::IndexHeaderMap, true, true)) return OPT_F; if (Matches(*It, frontend::IndexHeaderMap, false, true)) return OPT_I; if (Matches(*It, frontend::Angled, true, true)) return OPT_F; if (Matches(*It, frontend::Angled, false, true)) return OPT_I; llvm_unreachable("Unexpected HeaderSearchOptions::Entry."); }(); if (It->Group == frontend::IndexHeaderMap) GenerateArg(Args, OPT_index_header_map, SA); GenerateArg(Args, Opt, It->Path, SA); }; // Note: some paths that came from "[-iprefix=xx] -iwithprefixbefore=yy" may // have already been generated as "-I[xx]yy". If that's the case, their // position on command line was such that this has no semantic impact on // include paths. for (; It < End && Matches(*It, {frontend::After, frontend::Angled}, false, true); ++It) { OptSpecifier Opt = It->Group == frontend::After ? OPT_iwithprefix : OPT_iwithprefixbefore; GenerateArg(Args, Opt, It->Path, SA); } // Note: Some paths that came from "-idirafter=xxyy" may have already been // generated as "-iwithprefix=xxyy". If that's the case, their position on // command line was such that this has no semantic impact on include paths. for (; It < End && Matches(*It, {frontend::After}, false, true); ++It) GenerateArg(Args, OPT_idirafter, It->Path, SA); for (; It < End && Matches(*It, {frontend::Quoted}, false, true); ++It) GenerateArg(Args, OPT_iquote, It->Path, SA); for (; It < End && Matches(*It, {frontend::System}, false, None); ++It) GenerateArg(Args, It->IgnoreSysRoot ? OPT_isystem : OPT_iwithsysroot, It->Path, SA); for (; It < End && Matches(*It, {frontend::System}, true, true); ++It) GenerateArg(Args, OPT_iframework, It->Path, SA); for (; It < End && Matches(*It, {frontend::System}, true, false); ++It) GenerateArg(Args, OPT_iframeworkwithsysroot, It->Path, SA); // Add the paths for the various language specific isystem flags. for (; It < End && Matches(*It, {frontend::CSystem}, false, true); ++It) GenerateArg(Args, OPT_c_isystem, It->Path, SA); for (; It < End && Matches(*It, {frontend::CXXSystem}, false, true); ++It) GenerateArg(Args, OPT_cxx_isystem, It->Path, SA); for (; It < End && Matches(*It, {frontend::ObjCSystem}, false, true); ++It) GenerateArg(Args, OPT_objc_isystem, It->Path, SA); for (; It < End && Matches(*It, {frontend::ObjCXXSystem}, false, true); ++It) GenerateArg(Args, OPT_objcxx_isystem, It->Path, SA); // Add the internal paths from a driver that detects standard include paths. // Note: Some paths that came from "-internal-isystem" arguments may have // already been generated as "-isystem". If that's the case, their position on // command line was such that this has no semantic impact on include paths. for (; It < End && Matches(*It, {frontend::System, frontend::ExternCSystem}, false, true); ++It) { OptSpecifier Opt = It->Group == frontend::System ? OPT_internal_isystem : OPT_internal_externc_isystem; GenerateArg(Args, Opt, It->Path, SA); } assert(It == End && "Unhandled HeaderSearchOption::Entry."); // Add the path prefixes which are implicitly treated as being system headers. for (const auto &P : Opts.SystemHeaderPrefixes) { OptSpecifier Opt = P.IsSystemHeader ? OPT_system_header_prefix : OPT_no_system_header_prefix; GenerateArg(Args, Opt, P.Prefix, SA); } for (const std::string &F : Opts.VFSOverlayFiles) GenerateArg(Args, OPT_ivfsoverlay, F, SA); } static bool ParseHeaderSearchArgs(HeaderSearchOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, const std::string &WorkingDir) { unsigned NumErrorsBefore = Diags.getNumErrors(); HeaderSearchOptions *HeaderSearchOpts = &Opts; #define HEADER_SEARCH_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef HEADER_SEARCH_OPTION_WITH_MARSHALLING if (const Arg *A = Args.getLastArg(OPT_stdlib_EQ)) Opts.UseLibcxx = (strcmp(A->getValue(), "libc++") == 0); // Canonicalize -fmodules-cache-path before storing it. SmallString<128> P(Args.getLastArgValue(OPT_fmodules_cache_path)); if (!(P.empty() || llvm::sys::path::is_absolute(P))) { if (WorkingDir.empty()) llvm::sys::fs::make_absolute(P); else llvm::sys::fs::make_absolute(WorkingDir, P); } llvm::sys::path::remove_dots(P); Opts.ModuleCachePath = std::string(P.str()); // Only the -fmodule-file== form. for (const auto *A : Args.filtered(OPT_fmodule_file)) { StringRef Val = A->getValue(); if (Val.contains('=')) { auto Split = Val.split('='); Opts.PrebuiltModuleFiles.insert( {std::string(Split.first), std::string(Split.second)}); } } for (const auto *A : Args.filtered(OPT_fprebuilt_module_path)) Opts.AddPrebuiltModulePath(A->getValue()); for (const auto *A : Args.filtered(OPT_fmodules_ignore_macro)) { StringRef MacroDef = A->getValue(); Opts.ModulesIgnoreMacros.insert( llvm::CachedHashString(MacroDef.split('=').first)); } // Add -I..., -F..., and -index-header-map options in order. bool IsIndexHeaderMap = false; bool IsSysrootSpecified = Args.hasArg(OPT__sysroot_EQ) || Args.hasArg(OPT_isysroot); for (const auto *A : Args.filtered(OPT_I, OPT_F, OPT_index_header_map)) { if (A->getOption().matches(OPT_index_header_map)) { // -index-header-map applies to the next -I or -F. IsIndexHeaderMap = true; continue; } frontend::IncludeDirGroup Group = IsIndexHeaderMap ? frontend::IndexHeaderMap : frontend::Angled; bool IsFramework = A->getOption().matches(OPT_F); std::string Path = A->getValue(); if (IsSysrootSpecified && !IsFramework && A->getValue()[0] == '=') { SmallString<32> Buffer; llvm::sys::path::append(Buffer, Opts.Sysroot, llvm::StringRef(A->getValue()).substr(1)); Path = std::string(Buffer.str()); } Opts.AddPath(Path, Group, IsFramework, /*IgnoreSysroot*/ true); IsIndexHeaderMap = false; } // Add -iprefix/-iwithprefix/-iwithprefixbefore options. StringRef Prefix = ""; // FIXME: This isn't the correct default prefix. for (const auto *A : Args.filtered(OPT_iprefix, OPT_iwithprefix, OPT_iwithprefixbefore)) { if (A->getOption().matches(OPT_iprefix)) Prefix = A->getValue(); else if (A->getOption().matches(OPT_iwithprefix)) Opts.AddPath(Prefix.str() + A->getValue(), frontend::After, false, true); else Opts.AddPath(Prefix.str() + A->getValue(), frontend::Angled, false, true); } for (const auto *A : Args.filtered(OPT_idirafter)) Opts.AddPath(A->getValue(), frontend::After, false, true); for (const auto *A : Args.filtered(OPT_iquote)) Opts.AddPath(A->getValue(), frontend::Quoted, false, true); for (const auto *A : Args.filtered(OPT_isystem, OPT_iwithsysroot)) Opts.AddPath(A->getValue(), frontend::System, false, !A->getOption().matches(OPT_iwithsysroot)); for (const auto *A : Args.filtered(OPT_iframework)) Opts.AddPath(A->getValue(), frontend::System, true, true); for (const auto *A : Args.filtered(OPT_iframeworkwithsysroot)) Opts.AddPath(A->getValue(), frontend::System, /*IsFramework=*/true, /*IgnoreSysRoot=*/false); // Add the paths for the various language specific isystem flags. for (const auto *A : Args.filtered(OPT_c_isystem)) Opts.AddPath(A->getValue(), frontend::CSystem, false, true); for (const auto *A : Args.filtered(OPT_cxx_isystem)) Opts.AddPath(A->getValue(), frontend::CXXSystem, false, true); for (const auto *A : Args.filtered(OPT_objc_isystem)) Opts.AddPath(A->getValue(), frontend::ObjCSystem, false,true); for (const auto *A : Args.filtered(OPT_objcxx_isystem)) Opts.AddPath(A->getValue(), frontend::ObjCXXSystem, false, true); // Add the internal paths from a driver that detects standard include paths. for (const auto *A : Args.filtered(OPT_internal_isystem, OPT_internal_externc_isystem)) { frontend::IncludeDirGroup Group = frontend::System; if (A->getOption().matches(OPT_internal_externc_isystem)) Group = frontend::ExternCSystem; Opts.AddPath(A->getValue(), Group, false, true); } // Add the path prefixes which are implicitly treated as being system headers. for (const auto *A : Args.filtered(OPT_system_header_prefix, OPT_no_system_header_prefix)) Opts.AddSystemHeaderPrefix( A->getValue(), A->getOption().matches(OPT_system_header_prefix)); for (const auto *A : Args.filtered(OPT_ivfsoverlay)) Opts.AddVFSOverlayFile(A->getValue()); return Diags.getNumErrors() == NumErrorsBefore; } void CompilerInvocation::setLangDefaults(LangOptions &Opts, InputKind IK, const llvm::Triple &T, std::vector &Includes, LangStandard::Kind LangStd) { // Set some properties which depend solely on the input kind; it would be nice // to move these to the language standard, and have the driver resolve the // input kind + language standard. // // FIXME: Perhaps a better model would be for a single source file to have // multiple language standards (C / C++ std, ObjC std, OpenCL std, OpenMP std) // simultaneously active? if (IK.getLanguage() == Language::Asm) { Opts.AsmPreprocessor = 1; } else if (IK.isObjectiveC()) { Opts.ObjC = 1; } if (LangStd == LangStandard::lang_unspecified) { // Based on the base language, pick one. switch (IK.getLanguage()) { case Language::Unknown: case Language::LLVM_IR: llvm_unreachable("Invalid input kind!"); case Language::OpenCL: LangStd = LangStandard::lang_opencl12; break; case Language::OpenCLCXX: LangStd = LangStandard::lang_openclcpp10; break; case Language::CUDA: LangStd = LangStandard::lang_cuda; break; case Language::Asm: case Language::C: #if defined(CLANG_DEFAULT_STD_C) LangStd = CLANG_DEFAULT_STD_C; #else // The PS4 uses C99 as the default C standard. if (T.isPS4()) LangStd = LangStandard::lang_gnu99; else LangStd = LangStandard::lang_gnu17; #endif break; case Language::ObjC: #if defined(CLANG_DEFAULT_STD_C) LangStd = CLANG_DEFAULT_STD_C; #else LangStd = LangStandard::lang_gnu11; #endif break; case Language::CXX: case Language::ObjCXX: #if defined(CLANG_DEFAULT_STD_CXX) LangStd = CLANG_DEFAULT_STD_CXX; #else LangStd = LangStandard::lang_gnucxx14; #endif break; case Language::RenderScript: LangStd = LangStandard::lang_c99; break; case Language::HIP: LangStd = LangStandard::lang_hip; break; } } const LangStandard &Std = LangStandard::getLangStandardForKind(LangStd); Opts.LangStd = LangStd; Opts.LineComment = Std.hasLineComments(); Opts.C99 = Std.isC99(); Opts.C11 = Std.isC11(); Opts.C17 = Std.isC17(); Opts.C2x = Std.isC2x(); Opts.CPlusPlus = Std.isCPlusPlus(); Opts.CPlusPlus11 = Std.isCPlusPlus11(); Opts.CPlusPlus14 = Std.isCPlusPlus14(); Opts.CPlusPlus17 = Std.isCPlusPlus17(); Opts.CPlusPlus20 = Std.isCPlusPlus20(); Opts.CPlusPlus2b = Std.isCPlusPlus2b(); Opts.GNUMode = Std.isGNUMode(); Opts.GNUCVersion = 0; Opts.HexFloats = Std.hasHexFloats(); Opts.ImplicitInt = Std.hasImplicitInt(); // Set OpenCL Version. Opts.OpenCL = Std.isOpenCL(); if (LangStd == LangStandard::lang_opencl10) Opts.OpenCLVersion = 100; else if (LangStd == LangStandard::lang_opencl11) Opts.OpenCLVersion = 110; else if (LangStd == LangStandard::lang_opencl12) Opts.OpenCLVersion = 120; else if (LangStd == LangStandard::lang_opencl20) Opts.OpenCLVersion = 200; else if (LangStd == LangStandard::lang_opencl30) Opts.OpenCLVersion = 300; else if (LangStd == LangStandard::lang_openclcpp10) Opts.OpenCLCPlusPlusVersion = 100; else if (LangStd == LangStandard::lang_openclcpp2021) Opts.OpenCLCPlusPlusVersion = 202100; // OpenCL has some additional defaults. if (Opts.OpenCL) { Opts.AltiVec = 0; Opts.ZVector = 0; Opts.setDefaultFPContractMode(LangOptions::FPM_On); Opts.OpenCLCPlusPlus = Opts.CPlusPlus; Opts.OpenCLPipes = Opts.getOpenCLCompatibleVersion() == 200; Opts.OpenCLGenericAddressSpace = Opts.getOpenCLCompatibleVersion() == 200; // Include default header file for OpenCL. if (Opts.IncludeDefaultHeader) { if (Opts.DeclareOpenCLBuiltins) { // Only include base header file for builtin types and constants. Includes.push_back("opencl-c-base.h"); } else { Includes.push_back("opencl-c.h"); } } } Opts.HIP = IK.getLanguage() == Language::HIP; Opts.CUDA = IK.getLanguage() == Language::CUDA || Opts.HIP; if (Opts.HIP) { // HIP toolchain does not support 'Fast' FPOpFusion in backends since it // fuses multiplication/addition instructions without contract flag from // device library functions in LLVM bitcode, which causes accuracy loss in // certain math functions, e.g. tan(-1e20) becomes -0.933 instead of 0.8446. // For device library functions in bitcode to work, 'Strict' or 'Standard' // FPOpFusion options in backends is needed. Therefore 'fast-honor-pragmas' // FP contract option is used to allow fuse across statements in frontend // whereas respecting contract flag in backend. Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas); } else if (Opts.CUDA) { // Allow fuse across statements disregarding pragmas. Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); } Opts.RenderScript = IK.getLanguage() == Language::RenderScript; // OpenCL and C++ both have bool, true, false keywords. Opts.Bool = Opts.OpenCL || Opts.CPlusPlus; // OpenCL has half keyword Opts.Half = Opts.OpenCL; } /// Check if input file kind and language standard are compatible. static bool IsInputCompatibleWithStandard(InputKind IK, const LangStandard &S) { switch (IK.getLanguage()) { case Language::Unknown: case Language::LLVM_IR: llvm_unreachable("should not parse language flags for this input"); case Language::C: case Language::ObjC: case Language::RenderScript: return S.getLanguage() == Language::C; case Language::OpenCL: return S.getLanguage() == Language::OpenCL || S.getLanguage() == Language::OpenCLCXX; case Language::OpenCLCXX: return S.getLanguage() == Language::OpenCLCXX; case Language::CXX: case Language::ObjCXX: return S.getLanguage() == Language::CXX; case Language::CUDA: // FIXME: What -std= values should be permitted for CUDA compilations? return S.getLanguage() == Language::CUDA || S.getLanguage() == Language::CXX; case Language::HIP: return S.getLanguage() == Language::CXX || S.getLanguage() == Language::HIP; case Language::Asm: // Accept (and ignore) all -std= values. // FIXME: The -std= value is not ignored; it affects the tokenization // and preprocessing rules if we're preprocessing this asm input. return true; } llvm_unreachable("unexpected input language"); } /// Get language name for given input kind. static StringRef GetInputKindName(InputKind IK) { switch (IK.getLanguage()) { case Language::C: return "C"; case Language::ObjC: return "Objective-C"; case Language::CXX: return "C++"; case Language::ObjCXX: return "Objective-C++"; case Language::OpenCL: return "OpenCL"; case Language::OpenCLCXX: return "C++ for OpenCL"; case Language::CUDA: return "CUDA"; case Language::RenderScript: return "RenderScript"; case Language::HIP: return "HIP"; case Language::Asm: return "Asm"; case Language::LLVM_IR: return "LLVM IR"; case Language::Unknown: break; } llvm_unreachable("unknown input language"); } void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, SmallVectorImpl &Args, StringAllocator SA, const llvm::Triple &T, InputKind IK) { if (IK.getFormat() == InputKind::Precompiled || IK.getLanguage() == Language::LLVM_IR) { if (Opts.ObjCAutoRefCount) GenerateArg(Args, OPT_fobjc_arc, SA); if (Opts.PICLevel != 0) GenerateArg(Args, OPT_pic_level, Twine(Opts.PICLevel), SA); if (Opts.PIE) GenerateArg(Args, OPT_pic_is_pie, SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize)) GenerateArg(Args, OPT_fsanitize_EQ, Sanitizer, SA); return; } OptSpecifier StdOpt; switch (Opts.LangStd) { case LangStandard::lang_opencl10: case LangStandard::lang_opencl11: case LangStandard::lang_opencl12: case LangStandard::lang_opencl20: case LangStandard::lang_opencl30: case LangStandard::lang_openclcpp10: case LangStandard::lang_openclcpp2021: StdOpt = OPT_cl_std_EQ; break; default: StdOpt = OPT_std_EQ; break; } auto LangStandard = LangStandard::getLangStandardForKind(Opts.LangStd); GenerateArg(Args, StdOpt, LangStandard.getName(), SA); if (Opts.IncludeDefaultHeader) GenerateArg(Args, OPT_finclude_default_header, SA); if (Opts.DeclareOpenCLBuiltins) GenerateArg(Args, OPT_fdeclare_opencl_builtins, SA); const LangOptions *LangOpts = &Opts; #define LANG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING // The '-fcf-protection=' option is generated by CodeGenOpts generator. if (Opts.ObjC) { GenerateArg(Args, OPT_fobjc_runtime_EQ, Opts.ObjCRuntime.getAsString(), SA); if (Opts.GC == LangOptions::GCOnly) GenerateArg(Args, OPT_fobjc_gc_only, SA); else if (Opts.GC == LangOptions::HybridGC) GenerateArg(Args, OPT_fobjc_gc, SA); else if (Opts.ObjCAutoRefCount == 1) GenerateArg(Args, OPT_fobjc_arc, SA); if (Opts.ObjCWeakRuntime) GenerateArg(Args, OPT_fobjc_runtime_has_weak, SA); if (Opts.ObjCWeak) GenerateArg(Args, OPT_fobjc_weak, SA); if (Opts.ObjCSubscriptingLegacyRuntime) GenerateArg(Args, OPT_fobjc_subscripting_legacy_runtime, SA); } if (Opts.GNUCVersion != 0) { unsigned Major = Opts.GNUCVersion / 100 / 100; unsigned Minor = (Opts.GNUCVersion / 100) % 100; unsigned Patch = Opts.GNUCVersion % 100; GenerateArg(Args, OPT_fgnuc_version_EQ, Twine(Major) + "." + Twine(Minor) + "." + Twine(Patch), SA); } if (Opts.IgnoreXCOFFVisibility) GenerateArg(Args, OPT_mignore_xcoff_visibility, SA); if (Opts.SignedOverflowBehavior == LangOptions::SOB_Trapping) { GenerateArg(Args, OPT_ftrapv, SA); GenerateArg(Args, OPT_ftrapv_handler, Opts.OverflowHandler, SA); } else if (Opts.SignedOverflowBehavior == LangOptions::SOB_Defined) { GenerateArg(Args, OPT_fwrapv, SA); } if (Opts.MSCompatibilityVersion != 0) { unsigned Major = Opts.MSCompatibilityVersion / 10000000; unsigned Minor = (Opts.MSCompatibilityVersion / 100000) % 100; unsigned Subminor = Opts.MSCompatibilityVersion % 100000; GenerateArg(Args, OPT_fms_compatibility_version, Twine(Major) + "." + Twine(Minor) + "." + Twine(Subminor), SA); } if ((!Opts.GNUMode && !Opts.MSVCCompat && !Opts.CPlusPlus17) || T.isOSzOS()) { if (!Opts.Trigraphs) GenerateArg(Args, OPT_fno_trigraphs, SA); } else { if (Opts.Trigraphs) GenerateArg(Args, OPT_ftrigraphs, SA); } if (Opts.Blocks && !(Opts.OpenCL && Opts.OpenCLVersion == 200)) GenerateArg(Args, OPT_fblocks, SA); if (Opts.ConvergentFunctions && !(Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || Opts.SYCLIsDevice)) GenerateArg(Args, OPT_fconvergent_functions, SA); if (Opts.NoBuiltin && !Opts.Freestanding) GenerateArg(Args, OPT_fno_builtin, SA); if (!Opts.NoBuiltin) for (const auto &Func : Opts.NoBuiltinFuncs) GenerateArg(Args, OPT_fno_builtin_, Func, SA); if (Opts.LongDoubleSize == 128) GenerateArg(Args, OPT_mlong_double_128, SA); else if (Opts.LongDoubleSize == 64) GenerateArg(Args, OPT_mlong_double_64, SA); // Not generating '-mrtd', it's just an alias for '-fdefault-calling-conv='. // OpenMP was requested via '-fopenmp', not implied by '-fopenmp-simd' or // '-fopenmp-targets='. if (Opts.OpenMP && !Opts.OpenMPSimd) { GenerateArg(Args, OPT_fopenmp, SA); if (Opts.OpenMP != 50) GenerateArg(Args, OPT_fopenmp_version_EQ, Twine(Opts.OpenMP), SA); if (!Opts.OpenMPUseTLS) GenerateArg(Args, OPT_fnoopenmp_use_tls, SA); if (Opts.OpenMPIsDevice) GenerateArg(Args, OPT_fopenmp_is_device, SA); if (Opts.OpenMPIRBuilder) GenerateArg(Args, OPT_fopenmp_enable_irbuilder, SA); } if (Opts.OpenMPSimd) { GenerateArg(Args, OPT_fopenmp_simd, SA); if (Opts.OpenMP != 50) GenerateArg(Args, OPT_fopenmp_version_EQ, Twine(Opts.OpenMP), SA); } if (Opts.OpenMPTargetNewRuntime) GenerateArg(Args, OPT_fopenmp_target_new_runtime, SA); if (Opts.OpenMPThreadSubscription) GenerateArg(Args, OPT_fopenmp_assume_threads_oversubscription, SA); if (Opts.OpenMPTeamSubscription) GenerateArg(Args, OPT_fopenmp_assume_teams_oversubscription, SA); if (Opts.OpenMPTargetDebug != 0) GenerateArg(Args, OPT_fopenmp_target_debug_EQ, Twine(Opts.OpenMPTargetDebug), SA); if (Opts.OpenMPCUDANumSMs != 0) GenerateArg(Args, OPT_fopenmp_cuda_number_of_sm_EQ, Twine(Opts.OpenMPCUDANumSMs), SA); if (Opts.OpenMPCUDABlocksPerSM != 0) GenerateArg(Args, OPT_fopenmp_cuda_blocks_per_sm_EQ, Twine(Opts.OpenMPCUDABlocksPerSM), SA); if (Opts.OpenMPCUDAReductionBufNum != 1024) GenerateArg(Args, OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, Twine(Opts.OpenMPCUDAReductionBufNum), SA); if (!Opts.OMPTargetTriples.empty()) { std::string Targets; llvm::raw_string_ostream OS(Targets); llvm::interleave( Opts.OMPTargetTriples, OS, [&OS](const llvm::Triple &T) { OS << T.str(); }, ","); GenerateArg(Args, OPT_fopenmp_targets_EQ, OS.str(), SA); } if (!Opts.OMPHostIRFile.empty()) GenerateArg(Args, OPT_fopenmp_host_ir_file_path, Opts.OMPHostIRFile, SA); if (Opts.OpenMPCUDAMode) GenerateArg(Args, OPT_fopenmp_cuda_mode, SA); if (Opts.OpenMPCUDAForceFullRuntime) GenerateArg(Args, OPT_fopenmp_cuda_force_full_runtime, SA); // The arguments used to set Optimize, OptimizeSize and NoInlineDefine are // generated from CodeGenOptions. if (Opts.DefaultFPContractMode == LangOptions::FPM_Fast) GenerateArg(Args, OPT_ffp_contract, "fast", SA); else if (Opts.DefaultFPContractMode == LangOptions::FPM_On) GenerateArg(Args, OPT_ffp_contract, "on", SA); else if (Opts.DefaultFPContractMode == LangOptions::FPM_Off) GenerateArg(Args, OPT_ffp_contract, "off", SA); else if (Opts.DefaultFPContractMode == LangOptions::FPM_FastHonorPragmas) GenerateArg(Args, OPT_ffp_contract, "fast-honor-pragmas", SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize)) GenerateArg(Args, OPT_fsanitize_EQ, Sanitizer, SA); // Conflating '-fsanitize-system-ignorelist' and '-fsanitize-ignorelist'. for (const std::string &F : Opts.NoSanitizeFiles) GenerateArg(Args, OPT_fsanitize_ignorelist_EQ, F, SA); if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver3_8) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "3.8", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver4) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "4.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver6) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "6.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver7) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "7.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver9) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "9.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver11) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "11.0", SA); else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver12) GenerateArg(Args, OPT_fclang_abi_compat_EQ, "12.0", SA); - else if (Opts.getClangABICompat() == LangOptions::ClangABI::Ver13) - GenerateArg(Args, OPT_fclang_abi_compat_EQ, "13.0", SA); if (Opts.getSignReturnAddressScope() == LangOptions::SignReturnAddressScopeKind::All) GenerateArg(Args, OPT_msign_return_address_EQ, "all", SA); else if (Opts.getSignReturnAddressScope() == LangOptions::SignReturnAddressScopeKind::NonLeaf) GenerateArg(Args, OPT_msign_return_address_EQ, "non-leaf", SA); if (Opts.getSignReturnAddressKey() == LangOptions::SignReturnAddressKeyKind::BKey) GenerateArg(Args, OPT_msign_return_address_key_EQ, "b_key", SA); if (Opts.CXXABI) GenerateArg(Args, OPT_fcxx_abi_EQ, TargetCXXABI::getSpelling(*Opts.CXXABI), SA); if (Opts.RelativeCXXABIVTables) GenerateArg(Args, OPT_fexperimental_relative_cxx_abi_vtables, SA); else GenerateArg(Args, OPT_fno_experimental_relative_cxx_abi_vtables, SA); for (const auto &MP : Opts.MacroPrefixMap) GenerateArg(Args, OPT_fmacro_prefix_map_EQ, MP.first + "=" + MP.second, SA); } bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, const llvm::Triple &T, std::vector &Includes, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); if (IK.getFormat() == InputKind::Precompiled || IK.getLanguage() == Language::LLVM_IR) { // ObjCAAutoRefCount and Sanitize LangOpts are used to setup the // PassManager in BackendUtil.cpp. They need to be initialized no matter // what the input type is. if (Args.hasArg(OPT_fobjc_arc)) Opts.ObjCAutoRefCount = 1; // PICLevel and PIELevel are needed during code generation and this should // be set regardless of the input type. Opts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags); Opts.PIE = Args.hasArg(OPT_pic_is_pie); parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ), Diags, Opts.Sanitize); return Diags.getNumErrors() == NumErrorsBefore; } // Other LangOpts are only initialized when the input is not AST or LLVM IR. // FIXME: Should we really be parsing this for an Language::Asm input? // FIXME: Cleanup per-file based stuff. LangStandard::Kind LangStd = LangStandard::lang_unspecified; if (const Arg *A = Args.getLastArg(OPT_std_EQ)) { LangStd = LangStandard::getLangKind(A->getValue()); if (LangStd == LangStandard::lang_unspecified) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); // Report supported standards with short description. for (unsigned KindValue = 0; KindValue != LangStandard::lang_unspecified; ++KindValue) { const LangStandard &Std = LangStandard::getLangStandardForKind( static_cast(KindValue)); if (IsInputCompatibleWithStandard(IK, Std)) { auto Diag = Diags.Report(diag::note_drv_use_standard); Diag << Std.getName() << Std.getDescription(); unsigned NumAliases = 0; #define LANGSTANDARD(id, name, lang, desc, features) #define LANGSTANDARD_ALIAS(id, alias) \ if (KindValue == LangStandard::lang_##id) ++NumAliases; #define LANGSTANDARD_ALIAS_DEPR(id, alias) #include "clang/Basic/LangStandards.def" Diag << NumAliases; #define LANGSTANDARD(id, name, lang, desc, features) #define LANGSTANDARD_ALIAS(id, alias) \ if (KindValue == LangStandard::lang_##id) Diag << alias; #define LANGSTANDARD_ALIAS_DEPR(id, alias) #include "clang/Basic/LangStandards.def" } } } else { // Valid standard, check to make sure language and standard are // compatible. const LangStandard &Std = LangStandard::getLangStandardForKind(LangStd); if (!IsInputCompatibleWithStandard(IK, Std)) { Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getAsString(Args) << GetInputKindName(IK); } } } // -cl-std only applies for OpenCL language standards. // Override the -std option in this case. if (const Arg *A = Args.getLastArg(OPT_cl_std_EQ)) { LangStandard::Kind OpenCLLangStd = llvm::StringSwitch(A->getValue()) .Cases("cl", "CL", LangStandard::lang_opencl10) .Cases("cl1.0", "CL1.0", LangStandard::lang_opencl10) .Cases("cl1.1", "CL1.1", LangStandard::lang_opencl11) .Cases("cl1.2", "CL1.2", LangStandard::lang_opencl12) .Cases("cl2.0", "CL2.0", LangStandard::lang_opencl20) .Cases("cl3.0", "CL3.0", LangStandard::lang_opencl30) .Cases("clc++", "CLC++", LangStandard::lang_openclcpp10) .Cases("clc++1.0", "CLC++1.0", LangStandard::lang_openclcpp10) .Cases("clc++2021", "CLC++2021", LangStandard::lang_openclcpp2021) .Default(LangStandard::lang_unspecified); if (OpenCLLangStd == LangStandard::lang_unspecified) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } else LangStd = OpenCLLangStd; } // These need to be parsed now. They are used to set OpenCL defaults. Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header); Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins); CompilerInvocation::setLangDefaults(Opts, IK, T, Includes, LangStd); // The key paths of codegen options defined in Options.td start with // "LangOpts->". Let's provide the expected variable name and type. LangOptions *LangOpts = &Opts; #define LANG_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { StringRef Name = A->getValue(); if (Name == "full" || Name == "branch") { Opts.CFProtectionBranch = 1; } } if ((Args.hasArg(OPT_fsycl_is_device) || Args.hasArg(OPT_fsycl_is_host)) && !Args.hasArg(OPT_sycl_std_EQ)) { // If the user supplied -fsycl-is-device or -fsycl-is-host, but failed to // provide -sycl-std=, we want to default it to whatever the default SYCL // version is. I could not find a way to express this with the options // tablegen because we still want this value to be SYCL_None when the user // is not in device or host mode. Opts.setSYCLVersion(LangOptions::SYCL_Default); } if (Opts.ObjC) { if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) { StringRef value = arg->getValue(); if (Opts.ObjCRuntime.tryParse(value)) Diags.Report(diag::err_drv_unknown_objc_runtime) << value; } if (Args.hasArg(OPT_fobjc_gc_only)) Opts.setGC(LangOptions::GCOnly); else if (Args.hasArg(OPT_fobjc_gc)) Opts.setGC(LangOptions::HybridGC); else if (Args.hasArg(OPT_fobjc_arc)) { Opts.ObjCAutoRefCount = 1; if (!Opts.ObjCRuntime.allowsARC()) Diags.Report(diag::err_arc_unsupported_on_runtime); } // ObjCWeakRuntime tracks whether the runtime supports __weak, not // whether the feature is actually enabled. This is predominantly // determined by -fobjc-runtime, but we allow it to be overridden // from the command line for testing purposes. if (Args.hasArg(OPT_fobjc_runtime_has_weak)) Opts.ObjCWeakRuntime = 1; else Opts.ObjCWeakRuntime = Opts.ObjCRuntime.allowsWeak(); // ObjCWeak determines whether __weak is actually enabled. // Note that we allow -fno-objc-weak to disable this even in ARC mode. if (auto weakArg = Args.getLastArg(OPT_fobjc_weak, OPT_fno_objc_weak)) { if (!weakArg->getOption().matches(OPT_fobjc_weak)) { assert(!Opts.ObjCWeak); } else if (Opts.getGC() != LangOptions::NonGC) { Diags.Report(diag::err_objc_weak_with_gc); } else if (!Opts.ObjCWeakRuntime) { Diags.Report(diag::err_objc_weak_unsupported); } else { Opts.ObjCWeak = 1; } } else if (Opts.ObjCAutoRefCount) { Opts.ObjCWeak = Opts.ObjCWeakRuntime; } if (Args.hasArg(OPT_fobjc_subscripting_legacy_runtime)) Opts.ObjCSubscriptingLegacyRuntime = (Opts.ObjCRuntime.getKind() == ObjCRuntime::FragileMacOSX); } if (Arg *A = Args.getLastArg(options::OPT_fgnuc_version_EQ)) { // Check that the version has 1 to 3 components and the minor and patch // versions fit in two decimal digits. VersionTuple GNUCVer; bool Invalid = GNUCVer.tryParse(A->getValue()); unsigned Major = GNUCVer.getMajor(); unsigned Minor = GNUCVer.getMinor().getValueOr(0); unsigned Patch = GNUCVer.getSubminor().getValueOr(0); if (Invalid || GNUCVer.getBuild() || Minor >= 100 || Patch >= 100) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } Opts.GNUCVersion = Major * 100 * 100 + Minor * 100 + Patch; } // In AIX OS, the -mignore-xcoff-visibility is enable by default if there is // no -fvisibility=* option. // This is the reason why '-fvisibility' needs to be always generated: // its absence implies '-mignore-xcoff-visibility'. // // Suppose the original cc1 command line does contain '-fvisibility default': // '-mignore-xcoff-visibility' should not be implied. // * If '-fvisibility' is not generated (as most options with default values // don't), its absence would imply '-mignore-xcoff-visibility'. This changes // the command line semantics. // * If '-fvisibility' is generated regardless of its presence and value, // '-mignore-xcoff-visibility' won't be implied and the command line // semantics are kept intact. // // When the original cc1 command line does **not** contain '-fvisibility', // '-mignore-xcoff-visibility' is implied. The generated command line will // contain both '-fvisibility default' and '-mignore-xcoff-visibility' and // subsequent calls to `CreateFromArgs`/`generateCC1CommandLine` will always // produce the same arguments. if (T.isOSAIX() && (Args.hasArg(OPT_mignore_xcoff_visibility) || !Args.hasArg(OPT_fvisibility))) Opts.IgnoreXCOFFVisibility = 1; if (Args.hasArg(OPT_ftrapv)) { Opts.setSignedOverflowBehavior(LangOptions::SOB_Trapping); // Set the handler, if one is specified. Opts.OverflowHandler = std::string(Args.getLastArgValue(OPT_ftrapv_handler)); } else if (Args.hasArg(OPT_fwrapv)) Opts.setSignedOverflowBehavior(LangOptions::SOB_Defined); Opts.MSCompatibilityVersion = 0; if (const Arg *A = Args.getLastArg(OPT_fms_compatibility_version)) { VersionTuple VT; if (VT.tryParse(A->getValue())) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); Opts.MSCompatibilityVersion = VT.getMajor() * 10000000 + VT.getMinor().getValueOr(0) * 100000 + VT.getSubminor().getValueOr(0); } // Mimicking gcc's behavior, trigraphs are only enabled if -trigraphs // is specified, or -std is set to a conforming mode. // Trigraphs are disabled by default in c++1z onwards. // For z/OS, trigraphs are enabled by default (without regard to the above). Opts.Trigraphs = (!Opts.GNUMode && !Opts.MSVCCompat && !Opts.CPlusPlus17) || T.isOSzOS(); Opts.Trigraphs = Args.hasFlag(OPT_ftrigraphs, OPT_fno_trigraphs, Opts.Trigraphs); Opts.Blocks = Args.hasArg(OPT_fblocks) || (Opts.OpenCL && Opts.OpenCLVersion == 200); Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || Opts.SYCLIsDevice || Args.hasArg(OPT_fconvergent_functions); Opts.NoBuiltin = Args.hasArg(OPT_fno_builtin) || Opts.Freestanding; if (!Opts.NoBuiltin) getAllNoBuiltinFuncValues(Args, Opts.NoBuiltinFuncs); Opts.LongDoubleSize = Args.hasArg(OPT_mlong_double_128) ? 128 : Args.hasArg(OPT_mlong_double_64) ? 64 : 0; if (Opts.FastRelaxedMath) Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); llvm::sort(Opts.ModuleFeatures); // -mrtd option if (Arg *A = Args.getLastArg(OPT_mrtd)) { if (Opts.getDefaultCallingConv() != LangOptions::DCC_None) Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << "-fdefault-calling-conv"; else { if (T.getArch() != llvm::Triple::x86) Diags.Report(diag::err_drv_argument_not_allowed_with) << A->getSpelling() << T.getTriple(); else Opts.setDefaultCallingConv(LangOptions::DCC_StdCall); } } // Check if -fopenmp is specified and set default version to 5.0. Opts.OpenMP = Args.hasArg(OPT_fopenmp) ? 50 : 0; // Check if -fopenmp-simd is specified. bool IsSimdSpecified = Args.hasFlag(options::OPT_fopenmp_simd, options::OPT_fno_openmp_simd, /*Default=*/false); Opts.OpenMPSimd = !Opts.OpenMP && IsSimdSpecified; Opts.OpenMPUseTLS = Opts.OpenMP && !Args.hasArg(options::OPT_fnoopenmp_use_tls); Opts.OpenMPIsDevice = Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_is_device); Opts.OpenMPIRBuilder = Opts.OpenMP && Args.hasArg(options::OPT_fopenmp_enable_irbuilder); bool IsTargetSpecified = Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ); Opts.OpenMPTargetNewRuntime = Opts.OpenMPIsDevice && Args.hasArg(options::OPT_fopenmp_target_new_runtime); Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice; if (Opts.OpenMP || Opts.OpenMPSimd) { if (int Version = getLastArgIntValue( Args, OPT_fopenmp_version_EQ, (IsSimdSpecified || IsTargetSpecified) ? 50 : Opts.OpenMP, Diags)) Opts.OpenMP = Version; // Provide diagnostic when a given target is not expected to be an OpenMP // device or host. if (!Opts.OpenMPIsDevice) { switch (T.getArch()) { default: break; // Add unsupported host targets here: case llvm::Triple::nvptx: case llvm::Triple::nvptx64: Diags.Report(diag::err_drv_omp_host_target_not_supported) << T.str(); break; } } } // Set the flag to prevent the implementation from emitting device exception // handling code for those requiring so. if ((Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN())) || Opts.OpenCLCPlusPlus) { Opts.Exceptions = 0; Opts.CXXExceptions = 0; } if (Opts.OpenMPIsDevice && T.isNVPTX()) { Opts.OpenMPCUDANumSMs = getLastArgIntValue(Args, options::OPT_fopenmp_cuda_number_of_sm_EQ, Opts.OpenMPCUDANumSMs, Diags); Opts.OpenMPCUDABlocksPerSM = getLastArgIntValue(Args, options::OPT_fopenmp_cuda_blocks_per_sm_EQ, Opts.OpenMPCUDABlocksPerSM, Diags); Opts.OpenMPCUDAReductionBufNum = getLastArgIntValue( Args, options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, Opts.OpenMPCUDAReductionBufNum, Diags); } // Set the value of the debugging flag used in the new offloading device RTL. // Set either by a specific value or to a default if not specified. if (Opts.OpenMPIsDevice && (Args.hasArg(OPT_fopenmp_target_debug) || Args.hasArg(OPT_fopenmp_target_debug_EQ))) { if (Opts.OpenMPTargetNewRuntime) { Opts.OpenMPTargetDebug = getLastArgIntValue( Args, OPT_fopenmp_target_debug_EQ, Opts.OpenMPTargetDebug, Diags); if (!Opts.OpenMPTargetDebug && Args.hasArg(OPT_fopenmp_target_debug)) Opts.OpenMPTargetDebug = 1; } else { Diags.Report(diag::err_drv_debug_no_new_runtime); } } if (Opts.OpenMPIsDevice && Opts.OpenMPTargetNewRuntime) { if (Args.hasArg(OPT_fopenmp_assume_teams_oversubscription)) Opts.OpenMPTeamSubscription = true; if (Args.hasArg(OPT_fopenmp_assume_threads_oversubscription)) Opts.OpenMPThreadSubscription = true; } // Get the OpenMP target triples if any. if (Arg *A = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { enum ArchPtrSize { Arch16Bit, Arch32Bit, Arch64Bit }; auto getArchPtrSize = [](const llvm::Triple &T) { if (T.isArch16Bit()) return Arch16Bit; if (T.isArch32Bit()) return Arch32Bit; assert(T.isArch64Bit() && "Expected 64-bit architecture"); return Arch64Bit; }; for (unsigned i = 0; i < A->getNumValues(); ++i) { llvm::Triple TT(A->getValue(i)); if (TT.getArch() == llvm::Triple::UnknownArch || !(TT.getArch() == llvm::Triple::aarch64 || TT.isPPC() || TT.getArch() == llvm::Triple::nvptx || TT.getArch() == llvm::Triple::nvptx64 || TT.getArch() == llvm::Triple::amdgcn || TT.getArch() == llvm::Triple::x86 || TT.getArch() == llvm::Triple::x86_64)) Diags.Report(diag::err_drv_invalid_omp_target) << A->getValue(i); else if (getArchPtrSize(T) != getArchPtrSize(TT)) Diags.Report(diag::err_drv_incompatible_omp_arch) << A->getValue(i) << T.str(); else Opts.OMPTargetTriples.push_back(TT); } } // Get OpenMP host file path if any and report if a non existent file is // found if (Arg *A = Args.getLastArg(options::OPT_fopenmp_host_ir_file_path)) { Opts.OMPHostIRFile = A->getValue(); if (!llvm::sys::fs::exists(Opts.OMPHostIRFile)) Diags.Report(diag::err_drv_omp_host_ir_file_not_found) << Opts.OMPHostIRFile; } // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && Args.hasArg(options::OPT_fopenmp_cuda_mode); // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options Opts.OpenMPCUDAForceFullRuntime = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && Args.hasArg(options::OPT_fopenmp_cuda_force_full_runtime); // FIXME: Eliminate this dependency. unsigned Opt = getOptimizationLevel(Args, IK, Diags), OptSize = getOptimizationLevelSize(Args); Opts.Optimize = Opt != 0; Opts.OptimizeSize = OptSize != 0; // This is the __NO_INLINE__ define, which just depends on things like the // optimization level and -fno-inline, not actually whether the backend has // inlining enabled. Opts.NoInlineDefine = !Opts.Optimize; if (Arg *InlineArg = Args.getLastArg( options::OPT_finline_functions, options::OPT_finline_hint_functions, options::OPT_fno_inline_functions, options::OPT_fno_inline)) if (InlineArg->getOption().matches(options::OPT_fno_inline)) Opts.NoInlineDefine = true; if (Arg *A = Args.getLastArg(OPT_ffp_contract)) { StringRef Val = A->getValue(); if (Val == "fast") Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); else if (Val == "on") Opts.setDefaultFPContractMode(LangOptions::FPM_On); else if (Val == "off") Opts.setDefaultFPContractMode(LangOptions::FPM_Off); else if (Val == "fast-honor-pragmas") Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas); else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } // Parse -fsanitize= arguments. parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ), Diags, Opts.Sanitize); Opts.NoSanitizeFiles = Args.getAllArgValues(OPT_fsanitize_ignorelist_EQ); std::vector systemIgnorelists = Args.getAllArgValues(OPT_fsanitize_system_ignorelist_EQ); Opts.NoSanitizeFiles.insert(Opts.NoSanitizeFiles.end(), systemIgnorelists.begin(), systemIgnorelists.end()); if (Arg *A = Args.getLastArg(OPT_fclang_abi_compat_EQ)) { Opts.setClangABICompat(LangOptions::ClangABI::Latest); StringRef Ver = A->getValue(); std::pair VerParts = Ver.split('.'); unsigned Major, Minor = 0; // Check the version number is valid: either 3.x (0 <= x <= 9) or // y or y.0 (4 <= y <= current version). if (!VerParts.first.startswith("0") && !VerParts.first.getAsInteger(10, Major) && 3 <= Major && Major <= CLANG_VERSION_MAJOR && (Major == 3 ? VerParts.second.size() == 1 && !VerParts.second.getAsInteger(10, Minor) : VerParts.first.size() == Ver.size() || VerParts.second == "0")) { // Got a valid version number. if (Major == 3 && Minor <= 8) Opts.setClangABICompat(LangOptions::ClangABI::Ver3_8); else if (Major <= 4) Opts.setClangABICompat(LangOptions::ClangABI::Ver4); else if (Major <= 6) Opts.setClangABICompat(LangOptions::ClangABI::Ver6); else if (Major <= 7) Opts.setClangABICompat(LangOptions::ClangABI::Ver7); else if (Major <= 9) Opts.setClangABICompat(LangOptions::ClangABI::Ver9); else if (Major <= 11) Opts.setClangABICompat(LangOptions::ClangABI::Ver11); else if (Major <= 12) Opts.setClangABICompat(LangOptions::ClangABI::Ver12); - else if (Major <= 13) - Opts.setClangABICompat(LangOptions::ClangABI::Ver13); } else if (Ver != "latest") { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } } if (Arg *A = Args.getLastArg(OPT_msign_return_address_EQ)) { StringRef SignScope = A->getValue(); if (SignScope.equals_insensitive("none")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::None); else if (SignScope.equals_insensitive("all")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::All); else if (SignScope.equals_insensitive("non-leaf")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::NonLeaf); else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << SignScope; if (Arg *A = Args.getLastArg(OPT_msign_return_address_key_EQ)) { StringRef SignKey = A->getValue(); if (!SignScope.empty() && !SignKey.empty()) { if (SignKey.equals_insensitive("a_key")) Opts.setSignReturnAddressKey( LangOptions::SignReturnAddressKeyKind::AKey); else if (SignKey.equals_insensitive("b_key")) Opts.setSignReturnAddressKey( LangOptions::SignReturnAddressKeyKind::BKey); else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << SignKey; } } } // The value can be empty, which indicates the system default should be used. StringRef CXXABI = Args.getLastArgValue(OPT_fcxx_abi_EQ); if (!CXXABI.empty()) { if (!TargetCXXABI::isABI(CXXABI)) { Diags.Report(diag::err_invalid_cxx_abi) << CXXABI; } else { auto Kind = TargetCXXABI::getKind(CXXABI); if (!TargetCXXABI::isSupportedCXXABI(T, Kind)) Diags.Report(diag::err_unsupported_cxx_abi) << CXXABI << T.str(); else Opts.CXXABI = Kind; } } Opts.RelativeCXXABIVTables = Args.hasFlag(options::OPT_fexperimental_relative_cxx_abi_vtables, options::OPT_fno_experimental_relative_cxx_abi_vtables, TargetCXXABI::usesRelativeVTables(T)); for (const auto &A : Args.getAllArgValues(OPT_fmacro_prefix_map_EQ)) { auto Split = StringRef(A).split('='); Opts.MacroPrefixMap.insert( {std::string(Split.first), std::string(Split.second)}); } // Error if -mvscale-min is unbounded. if (Arg *A = Args.getLastArg(options::OPT_mvscale_min_EQ)) { unsigned VScaleMin; if (StringRef(A->getValue()).getAsInteger(10, VScaleMin) || VScaleMin == 0) Diags.Report(diag::err_cc1_unbounded_vscale_min); } return Diags.getNumErrors() == NumErrorsBefore; } static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { switch (Action) { case frontend::ASTDeclList: case frontend::ASTDump: case frontend::ASTPrint: case frontend::ASTView: case frontend::EmitAssembly: case frontend::EmitBC: case frontend::EmitHTML: case frontend::EmitLLVM: case frontend::EmitLLVMOnly: case frontend::EmitCodeGenOnly: case frontend::EmitObj: case frontend::ExtractAPI: case frontend::FixIt: case frontend::GenerateModule: case frontend::GenerateModuleInterface: case frontend::GenerateHeaderModule: case frontend::GeneratePCH: case frontend::GenerateInterfaceStubs: case frontend::ParseSyntaxOnly: case frontend::ModuleFileInfo: case frontend::VerifyPCH: case frontend::PluginAction: case frontend::RewriteObjC: case frontend::RewriteTest: case frontend::RunAnalysis: case frontend::TemplightDump: case frontend::MigrateSource: return false; case frontend::DumpCompilerOptions: case frontend::DumpRawTokens: case frontend::DumpTokens: case frontend::InitOnly: case frontend::PrintPreamble: case frontend::PrintPreprocessedInput: case frontend::RewriteMacros: case frontend::RunPreprocessorOnly: case frontend::PrintDependencyDirectivesSourceMinimizerOutput: return true; } llvm_unreachable("invalid frontend action"); } static void GeneratePreprocessorArgs(PreprocessorOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, const LangOptions &LangOpts, const FrontendOptions &FrontendOpts, const CodeGenOptions &CodeGenOpts) { PreprocessorOptions *PreprocessorOpts = &Opts; #define PREPROCESSOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OPTION_WITH_MARSHALLING if (Opts.PCHWithHdrStop && !Opts.PCHWithHdrStopCreate) GenerateArg(Args, OPT_pch_through_hdrstop_use, SA); for (const auto &D : Opts.DeserializedPCHDeclsToErrorOn) GenerateArg(Args, OPT_error_on_deserialized_pch_decl, D, SA); if (Opts.PrecompiledPreambleBytes != std::make_pair(0u, false)) GenerateArg(Args, OPT_preamble_bytes_EQ, Twine(Opts.PrecompiledPreambleBytes.first) + "," + (Opts.PrecompiledPreambleBytes.second ? "1" : "0"), SA); for (const auto &M : Opts.Macros) { // Don't generate __CET__ macro definitions. They are implied by the // -fcf-protection option that is generated elsewhere. if (M.first == "__CET__=1" && !M.second && !CodeGenOpts.CFProtectionReturn && CodeGenOpts.CFProtectionBranch) continue; if (M.first == "__CET__=2" && !M.second && CodeGenOpts.CFProtectionReturn && !CodeGenOpts.CFProtectionBranch) continue; if (M.first == "__CET__=3" && !M.second && CodeGenOpts.CFProtectionReturn && CodeGenOpts.CFProtectionBranch) continue; GenerateArg(Args, M.second ? OPT_U : OPT_D, M.first, SA); } for (const auto &I : Opts.Includes) { // Don't generate OpenCL includes. They are implied by other flags that are // generated elsewhere. if (LangOpts.OpenCL && LangOpts.IncludeDefaultHeader && ((LangOpts.DeclareOpenCLBuiltins && I == "opencl-c-base.h") || I == "opencl-c.h")) continue; GenerateArg(Args, OPT_include, I, SA); } for (const auto &CI : Opts.ChainedIncludes) GenerateArg(Args, OPT_chain_include, CI, SA); for (const auto &RF : Opts.RemappedFiles) GenerateArg(Args, OPT_remap_file, RF.first + ";" + RF.second, SA); // Don't handle LexEditorPlaceholders. It is implied by the action that is // generated elsewhere. } static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, frontend::ActionKind Action, const FrontendOptions &FrontendOpts) { unsigned NumErrorsBefore = Diags.getNumErrors(); PreprocessorOptions *PreprocessorOpts = &Opts; #define PREPROCESSOR_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OPTION_WITH_MARSHALLING Opts.PCHWithHdrStop = Args.hasArg(OPT_pch_through_hdrstop_create) || Args.hasArg(OPT_pch_through_hdrstop_use); for (const auto *A : Args.filtered(OPT_error_on_deserialized_pch_decl)) Opts.DeserializedPCHDeclsToErrorOn.insert(A->getValue()); if (const Arg *A = Args.getLastArg(OPT_preamble_bytes_EQ)) { StringRef Value(A->getValue()); size_t Comma = Value.find(','); unsigned Bytes = 0; unsigned EndOfLine = 0; if (Comma == StringRef::npos || Value.substr(0, Comma).getAsInteger(10, Bytes) || Value.substr(Comma + 1).getAsInteger(10, EndOfLine)) Diags.Report(diag::err_drv_preamble_format); else { Opts.PrecompiledPreambleBytes.first = Bytes; Opts.PrecompiledPreambleBytes.second = (EndOfLine != 0); } } // Add the __CET__ macro if a CFProtection option is set. if (const Arg *A = Args.getLastArg(OPT_fcf_protection_EQ)) { StringRef Name = A->getValue(); if (Name == "branch") Opts.addMacroDef("__CET__=1"); else if (Name == "return") Opts.addMacroDef("__CET__=2"); else if (Name == "full") Opts.addMacroDef("__CET__=3"); } // Add macros from the command line. for (const auto *A : Args.filtered(OPT_D, OPT_U)) { if (A->getOption().matches(OPT_D)) Opts.addMacroDef(A->getValue()); else Opts.addMacroUndef(A->getValue()); } // Add the ordered list of -includes. for (const auto *A : Args.filtered(OPT_include)) Opts.Includes.emplace_back(A->getValue()); for (const auto *A : Args.filtered(OPT_chain_include)) Opts.ChainedIncludes.emplace_back(A->getValue()); for (const auto *A : Args.filtered(OPT_remap_file)) { std::pair Split = StringRef(A->getValue()).split(';'); if (Split.second.empty()) { Diags.Report(diag::err_drv_invalid_remap_file) << A->getAsString(Args); continue; } Opts.addRemappedFile(Split.first, Split.second); } // Always avoid lexing editor placeholders when we're just running the // preprocessor as we never want to emit the // "editor placeholder in source file" error in PP only mode. if (isStrictlyPreprocessorAction(Action)) Opts.LexEditorPlaceholders = false; return Diags.getNumErrors() == NumErrorsBefore; } static void GeneratePreprocessorOutputArgs( const PreprocessorOutputOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA, frontend::ActionKind Action) { const PreprocessorOutputOptions &PreprocessorOutputOpts = Opts; #define PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING bool Generate_dM = isStrictlyPreprocessorAction(Action) && !Opts.ShowCPP; if (Generate_dM) GenerateArg(Args, OPT_dM, SA); if (!Generate_dM && Opts.ShowMacros) GenerateArg(Args, OPT_dD, SA); } static bool ParsePreprocessorOutputArgs(PreprocessorOutputOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags, frontend::ActionKind Action) { unsigned NumErrorsBefore = Diags.getNumErrors(); PreprocessorOutputOptions &PreprocessorOutputOpts = Opts; #define PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef PREPROCESSOR_OUTPUT_OPTION_WITH_MARSHALLING Opts.ShowCPP = isStrictlyPreprocessorAction(Action) && !Args.hasArg(OPT_dM); Opts.ShowMacros = Args.hasArg(OPT_dM) || Args.hasArg(OPT_dD); return Diags.getNumErrors() == NumErrorsBefore; } static void GenerateTargetArgs(const TargetOptions &Opts, SmallVectorImpl &Args, CompilerInvocation::StringAllocator SA) { const TargetOptions *TargetOpts = &Opts; #define TARGET_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef TARGET_OPTION_WITH_MARSHALLING if (!Opts.SDKVersion.empty()) GenerateArg(Args, OPT_target_sdk_version_EQ, Opts.SDKVersion.getAsString(), SA); } static bool ParseTargetArgs(TargetOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); TargetOptions *TargetOpts = &Opts; #define TARGET_OPTION_WITH_MARSHALLING( \ PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ MERGER, EXTRACTOR, TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING( \ Args, Diags, ID, FLAGS, PARAM, SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, MERGER, TABLE_INDEX) #include "clang/Driver/Options.inc" #undef TARGET_OPTION_WITH_MARSHALLING if (Arg *A = Args.getLastArg(options::OPT_target_sdk_version_EQ)) { llvm::VersionTuple Version; if (Version.tryParse(A->getValue())) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); else Opts.SDKVersion = Version; } return Diags.getNumErrors() == NumErrorsBefore; } bool CompilerInvocation::CreateFromArgsImpl( CompilerInvocation &Res, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { unsigned NumErrorsBefore = Diags.getNumErrors(); // Parse the arguments. const OptTable &Opts = getDriverOptTable(); const unsigned IncludedFlagsBitmask = options::CC1Option; unsigned MissingArgIndex, MissingArgCount; InputArgList Args = Opts.ParseArgs(CommandLineArgs, MissingArgIndex, MissingArgCount, IncludedFlagsBitmask); LangOptions &LangOpts = *Res.getLangOpts(); // Check for missing argument error. if (MissingArgCount) Diags.Report(diag::err_drv_missing_argument) << Args.getArgString(MissingArgIndex) << MissingArgCount; // Issue errors on unknown arguments. for (const auto *A : Args.filtered(OPT_UNKNOWN)) { auto ArgString = A->getAsString(Args); std::string Nearest; if (Opts.findNearest(ArgString, Nearest, IncludedFlagsBitmask) > 1) Diags.Report(diag::err_drv_unknown_argument) << ArgString; else Diags.Report(diag::err_drv_unknown_argument_with_suggestion) << ArgString << Nearest; } ParseFileSystemArgs(Res.getFileSystemOpts(), Args, Diags); ParseMigratorArgs(Res.getMigratorOpts(), Args, Diags); ParseAnalyzerArgs(*Res.getAnalyzerOpts(), Args, Diags); ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags, /*DefaultDiagColor=*/false); ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags, LangOpts.IsHeaderFile); // FIXME: We shouldn't have to pass the DashX option around here InputKind DashX = Res.getFrontendOpts().DashX; ParseTargetArgs(Res.getTargetOpts(), Args, Diags); llvm::Triple T(Res.getTargetOpts().Triple); ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args, Diags, Res.getFileSystemOpts().WorkingDir); ParseLangArgs(LangOpts, Args, DashX, T, Res.getPreprocessorOpts().Includes, Diags); if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC) LangOpts.ObjCExceptions = 1; if (LangOpts.CUDA) { // During CUDA device-side compilation, the aux triple is the // triple used for host compilation. if (LangOpts.CUDAIsDevice) Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; } // Set the triple of the host for OpenMP device compile. if (LangOpts.OpenMPIsDevice) Res.getTargetOpts().HostTriple = Res.getFrontendOpts().AuxTriple; ParseCodeGenArgs(Res.getCodeGenOpts(), Args, DashX, Diags, T, Res.getFrontendOpts().OutputFile, LangOpts); // FIXME: Override value name discarding when asan or msan is used because the // backend passes depend on the name of the alloca in order to print out // names. Res.getCodeGenOpts().DiscardValueNames &= !LangOpts.Sanitize.has(SanitizerKind::Address) && !LangOpts.Sanitize.has(SanitizerKind::KernelAddress) && !LangOpts.Sanitize.has(SanitizerKind::Memory) && !LangOpts.Sanitize.has(SanitizerKind::KernelMemory); ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, Diags, Res.getFrontendOpts().ProgramAction, Res.getFrontendOpts()); ParsePreprocessorOutputArgs(Res.getPreprocessorOutputOpts(), Args, Diags, Res.getFrontendOpts().ProgramAction); ParseDependencyOutputArgs(Res.getDependencyOutputOpts(), Args, Diags, Res.getFrontendOpts().ProgramAction, Res.getPreprocessorOutputOpts().ShowLineMarkers); if (!Res.getDependencyOutputOpts().OutputFile.empty() && Res.getDependencyOutputOpts().Targets.empty()) Diags.Report(diag::err_fe_dependency_file_requires_MT); // If sanitizer is enabled, disable OPT_ffine_grained_bitfield_accesses. if (Res.getCodeGenOpts().FineGrainedBitfieldAccesses && !Res.getLangOpts()->Sanitize.empty()) { Res.getCodeGenOpts().FineGrainedBitfieldAccesses = false; Diags.Report(diag::warn_drv_fine_grained_bitfield_accesses_ignored); } // Store the command-line for using in the CodeView backend. Res.getCodeGenOpts().Argv0 = Argv0; append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs); FixupInvocation(Res, Diags, Args, DashX); return Diags.getNumErrors() == NumErrorsBefore; } bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Invocation, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { CompilerInvocation DummyInvocation; return RoundTrip( [](CompilerInvocation &Invocation, ArrayRef CommandLineArgs, DiagnosticsEngine &Diags, const char *Argv0) { return CreateFromArgsImpl(Invocation, CommandLineArgs, Diags, Argv0); }, [](CompilerInvocation &Invocation, SmallVectorImpl &Args, StringAllocator SA) { Invocation.generateCC1CommandLine(Args, SA); }, Invocation, DummyInvocation, CommandLineArgs, Diags, Argv0); } std::string CompilerInvocation::getModuleHash() const { // FIXME: Consider using SHA1 instead of MD5. llvm::HashBuilder HBuilder; // Note: For QoI reasons, the things we use as a hash here should all be // dumped via the -module-info flag. // Start the signature with the compiler version. HBuilder.add(getClangFullRepositoryVersion()); // Also include the serialization version, in case LLVM_APPEND_VC_REV is off // and getClangFullRepositoryVersion() doesn't include git revision. HBuilder.add(serialization::VERSION_MAJOR, serialization::VERSION_MINOR); // Extend the signature with the language options #define LANGOPT(Name, Bits, Default, Description) HBuilder.add(LangOpts->Name); #define ENUM_LANGOPT(Name, Type, Bits, Default, Description) \ HBuilder.add(static_cast(LangOpts->get##Name())); #define BENIGN_LANGOPT(Name, Bits, Default, Description) #define BENIGN_ENUM_LANGOPT(Name, Type, Bits, Default, Description) #include "clang/Basic/LangOptions.def" HBuilder.addRange(LangOpts->ModuleFeatures); HBuilder.add(LangOpts->ObjCRuntime); HBuilder.addRange(LangOpts->CommentOpts.BlockCommandNames); // Extend the signature with the target options. HBuilder.add(TargetOpts->Triple, TargetOpts->CPU, TargetOpts->TuneCPU, TargetOpts->ABI); HBuilder.addRange(TargetOpts->FeaturesAsWritten); // Extend the signature with preprocessor options. const PreprocessorOptions &ppOpts = getPreprocessorOpts(); HBuilder.add(ppOpts.UsePredefines, ppOpts.DetailedRecord); const HeaderSearchOptions &hsOpts = getHeaderSearchOpts(); for (const auto &Macro : getPreprocessorOpts().Macros) { // If we're supposed to ignore this macro for the purposes of modules, // don't put it into the hash. if (!hsOpts.ModulesIgnoreMacros.empty()) { // Check whether we're ignoring this macro. StringRef MacroDef = Macro.first; if (hsOpts.ModulesIgnoreMacros.count( llvm::CachedHashString(MacroDef.split('=').first))) continue; } HBuilder.add(Macro); } // Extend the signature with the sysroot and other header search options. HBuilder.add(hsOpts.Sysroot, hsOpts.ModuleFormat, hsOpts.UseDebugInfo, hsOpts.UseBuiltinIncludes, hsOpts.UseStandardSystemIncludes, hsOpts.UseStandardCXXIncludes, hsOpts.UseLibcxx, hsOpts.ModulesValidateDiagnosticOptions); HBuilder.add(hsOpts.ResourceDir); if (hsOpts.ModulesStrictContextHash) { HBuilder.addRange(hsOpts.SystemHeaderPrefixes); HBuilder.addRange(hsOpts.UserEntries); const DiagnosticOptions &diagOpts = getDiagnosticOpts(); #define DIAGOPT(Name, Bits, Default) HBuilder.add(diagOpts.Name); #define ENUM_DIAGOPT(Name, Type, Bits, Default) \ HBuilder.add(diagOpts.get##Name()); #include "clang/Basic/DiagnosticOptions.def" #undef DIAGOPT #undef ENUM_DIAGOPT } // Extend the signature with the user build path. HBuilder.add(hsOpts.ModuleUserBuildPath); // Extend the signature with the module file extensions. for (const auto &ext : getFrontendOpts().ModuleFileExtensions) ext->hashExtension(HBuilder); // When compiling with -gmodules, also hash -fdebug-prefix-map as it // affects the debug info in the PCM. if (getCodeGenOpts().DebugTypeExtRefs) HBuilder.addRange(getCodeGenOpts().DebugPrefixMap); // Extend the signature with the enabled sanitizers, if at least one is // enabled. Sanitizers which cannot affect AST generation aren't hashed. SanitizerSet SanHash = LangOpts->Sanitize; SanHash.clear(getPPTransparentSanitizers()); if (!SanHash.empty()) HBuilder.add(SanHash.Mask); llvm::MD5::MD5Result Result; HBuilder.getHasher().final(Result); uint64_t Hash = Result.high() ^ Result.low(); return toString(llvm::APInt(64, Hash), 36, /*Signed=*/false); } void CompilerInvocation::generateCC1CommandLine( SmallVectorImpl &Args, StringAllocator SA) const { llvm::Triple T(TargetOpts->Triple); GenerateFileSystemArgs(FileSystemOpts, Args, SA); GenerateMigratorArgs(MigratorOpts, Args, SA); GenerateAnalyzerArgs(*AnalyzerOpts, Args, SA); GenerateDiagnosticArgs(*DiagnosticOpts, Args, SA, false); GenerateFrontendArgs(FrontendOpts, Args, SA, LangOpts->IsHeaderFile); GenerateTargetArgs(*TargetOpts, Args, SA); GenerateHeaderSearchArgs(*HeaderSearchOpts, Args, SA); GenerateLangArgs(*LangOpts, Args, SA, T, FrontendOpts.DashX); GenerateCodeGenArgs(CodeGenOpts, Args, SA, T, FrontendOpts.OutputFile, &*LangOpts); GeneratePreprocessorArgs(*PreprocessorOpts, Args, SA, *LangOpts, FrontendOpts, CodeGenOpts); GeneratePreprocessorOutputArgs(PreprocessorOutputOpts, Args, SA, FrontendOpts.ProgramAction); GenerateDependencyOutputArgs(DependencyOutputOpts, Args, SA); } IntrusiveRefCntPtr clang::createVFSFromCompilerInvocation(const CompilerInvocation &CI, DiagnosticsEngine &Diags) { return createVFSFromCompilerInvocation(CI, Diags, llvm::vfs::getRealFileSystem()); } IntrusiveRefCntPtr clang::createVFSFromCompilerInvocation( const CompilerInvocation &CI, DiagnosticsEngine &Diags, IntrusiveRefCntPtr BaseFS) { if (CI.getHeaderSearchOpts().VFSOverlayFiles.empty()) return BaseFS; IntrusiveRefCntPtr Result = BaseFS; // earlier vfs files are on the bottom for (const auto &File : CI.getHeaderSearchOpts().VFSOverlayFiles) { llvm::ErrorOr> Buffer = Result->getBufferForFile(File); if (!Buffer) { Diags.Report(diag::err_missing_vfs_overlay_file) << File; continue; } IntrusiveRefCntPtr FS = llvm::vfs::getVFSFromYAML( std::move(Buffer.get()), /*DiagHandler*/ nullptr, File, /*DiagContext*/ nullptr, Result); if (!FS) { Diags.Report(diag::err_invalid_vfs_overlay) << File; continue; } Result = FS; } return Result; } diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 1da0dfec3f23..467372c71496 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -1,6366 +1,6368 @@ //===--- SemaTemplateInstantiateDecl.cpp - C++ Template Decl Instantiation ===/ // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception //===----------------------------------------------------------------------===/ // // This file implements C++ template instantiation for declarations. // //===----------------------------------------------------------------------===/ #include "TreeTransform.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTMutationListener.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/DependentDiagnostic.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaInternal.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateInstCallback.h" #include "llvm/Support/TimeProfiler.h" using namespace clang; static bool isDeclWithinFunction(const Decl *D) { const DeclContext *DC = D->getDeclContext(); if (DC->isFunctionOrMethod()) return true; if (DC->isRecord()) return cast(DC)->isLocalClass(); return false; } template static bool SubstQualifier(Sema &SemaRef, const DeclT *OldDecl, DeclT *NewDecl, const MultiLevelTemplateArgumentList &TemplateArgs) { if (!OldDecl->getQualifierLoc()) return false; assert((NewDecl->getFriendObjectKind() || !OldDecl->getLexicalDeclContext()->isDependentContext()) && "non-friend with qualified name defined in dependent context"); Sema::ContextRAII SavedContext( SemaRef, const_cast(NewDecl->getFriendObjectKind() ? NewDecl->getLexicalDeclContext() : OldDecl->getLexicalDeclContext())); NestedNameSpecifierLoc NewQualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(OldDecl->getQualifierLoc(), TemplateArgs); if (!NewQualifierLoc) return true; NewDecl->setQualifierInfo(NewQualifierLoc); return false; } bool TemplateDeclInstantiator::SubstQualifier(const DeclaratorDecl *OldDecl, DeclaratorDecl *NewDecl) { return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs); } bool TemplateDeclInstantiator::SubstQualifier(const TagDecl *OldDecl, TagDecl *NewDecl) { return ::SubstQualifier(SemaRef, OldDecl, NewDecl, TemplateArgs); } // Include attribute instantiation code. #include "clang/Sema/AttrTemplateInstantiate.inc" static void instantiateDependentAlignedAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AlignedAttr *Aligned, Decl *New, bool IsPackExpansion) { if (Aligned->isAlignmentExpr()) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Aligned->getAlignmentExpr(), TemplateArgs); if (!Result.isInvalid()) S.AddAlignedAttr(New, *Aligned, Result.getAs(), IsPackExpansion); } else { TypeSourceInfo *Result = S.SubstType(Aligned->getAlignmentType(), TemplateArgs, Aligned->getLocation(), DeclarationName()); if (Result) S.AddAlignedAttr(New, *Aligned, Result, IsPackExpansion); } } static void instantiateDependentAlignedAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AlignedAttr *Aligned, Decl *New) { if (!Aligned->isPackExpansion()) { instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false); return; } SmallVector Unexpanded; if (Aligned->isAlignmentExpr()) S.collectUnexpandedParameterPacks(Aligned->getAlignmentExpr(), Unexpanded); else S.collectUnexpandedParameterPacks(Aligned->getAlignmentType()->getTypeLoc(), Unexpanded); assert(!Unexpanded.empty() && "Pack expansion without parameter packs?"); // Determine whether we can expand this attribute pack yet. bool Expand = true, RetainExpansion = false; Optional NumExpansions; // FIXME: Use the actual location of the ellipsis. SourceLocation EllipsisLoc = Aligned->getLocation(); if (S.CheckParameterPacksForExpansion(EllipsisLoc, Aligned->getRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return; if (!Expand) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, -1); instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, true); } else { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, I); instantiateDependentAlignedAttr(S, TemplateArgs, Aligned, New, false); } } } static void instantiateDependentAssumeAlignedAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AssumeAlignedAttr *Aligned, Decl *New) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); Expr *E, *OE = nullptr; ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs); if (Result.isInvalid()) return; E = Result.getAs(); if (Aligned->getOffset()) { Result = S.SubstExpr(Aligned->getOffset(), TemplateArgs); if (Result.isInvalid()) return; OE = Result.getAs(); } S.AddAssumeAlignedAttr(New, *Aligned, E, OE); } static void instantiateDependentAlignValueAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AlignValueAttr *Aligned, Decl *New) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Aligned->getAlignment(), TemplateArgs); if (!Result.isInvalid()) S.AddAlignValueAttr(New, *Aligned, Result.getAs()); } static void instantiateDependentAllocAlignAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AllocAlignAttr *Align, Decl *New) { Expr *Param = IntegerLiteral::Create( S.getASTContext(), llvm::APInt(64, Align->getParamIndex().getSourceIndex()), S.getASTContext().UnsignedLongLongTy, Align->getLocation()); S.AddAllocAlignAttr(New, *Align, Param); } static void instantiateDependentAnnotationAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AnnotateAttr *Attr, Decl *New) { EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); SmallVector Args; Args.reserve(Attr->args_size()); for (auto *E : Attr->args()) { ExprResult Result = S.SubstExpr(E, TemplateArgs); if (!Result.isUsable()) return; Args.push_back(Result.get()); } S.AddAnnotationAttr(New, *Attr, Attr->getAnnotation(), Args); } static Expr *instantiateDependentFunctionAttrCondition( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const Attr *A, Expr *OldCond, const Decl *Tmpl, FunctionDecl *New) { Expr *Cond = nullptr; { Sema::ContextRAII SwitchContext(S, New); EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(OldCond, TemplateArgs); if (Result.isInvalid()) return nullptr; Cond = Result.getAs(); } if (!Cond->isTypeDependent()) { ExprResult Converted = S.PerformContextuallyConvertToBool(Cond); if (Converted.isInvalid()) return nullptr; Cond = Converted.get(); } SmallVector Diags; if (OldCond->isValueDependent() && !Cond->isValueDependent() && !Expr::isPotentialConstantExprUnevaluated(Cond, New, Diags)) { S.Diag(A->getLocation(), diag::err_attr_cond_never_constant_expr) << A; for (const auto &P : Diags) S.Diag(P.first, P.second); return nullptr; } return Cond; } static void instantiateDependentEnableIfAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const EnableIfAttr *EIA, const Decl *Tmpl, FunctionDecl *New) { Expr *Cond = instantiateDependentFunctionAttrCondition( S, TemplateArgs, EIA, EIA->getCond(), Tmpl, New); if (Cond) New->addAttr(new (S.getASTContext()) EnableIfAttr(S.getASTContext(), *EIA, Cond, EIA->getMessage())); } static void instantiateDependentDiagnoseIfAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const DiagnoseIfAttr *DIA, const Decl *Tmpl, FunctionDecl *New) { Expr *Cond = instantiateDependentFunctionAttrCondition( S, TemplateArgs, DIA, DIA->getCond(), Tmpl, New); if (Cond) New->addAttr(new (S.getASTContext()) DiagnoseIfAttr( S.getASTContext(), *DIA, Cond, DIA->getMessage(), DIA->getDiagnosticType(), DIA->getArgDependent(), New)); } // Constructs and adds to New a new instance of CUDALaunchBoundsAttr using // template A as the base and arguments from TemplateArgs. static void instantiateDependentCUDALaunchBoundsAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const CUDALaunchBoundsAttr &Attr, Decl *New) { // The alignment expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Attr.getMaxThreads(), TemplateArgs); if (Result.isInvalid()) return; Expr *MaxThreads = Result.getAs(); Expr *MinBlocks = nullptr; if (Attr.getMinBlocks()) { Result = S.SubstExpr(Attr.getMinBlocks(), TemplateArgs); if (Result.isInvalid()) return; MinBlocks = Result.getAs(); } S.AddLaunchBoundsAttr(New, Attr, MaxThreads, MinBlocks); } static void instantiateDependentModeAttr(Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const ModeAttr &Attr, Decl *New) { S.AddModeAttr(New, Attr, Attr.getMode(), /*InInstantiation=*/true); } /// Instantiation of 'declare simd' attribute and its arguments. static void instantiateOMPDeclareSimdDeclAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const OMPDeclareSimdDeclAttr &Attr, Decl *New) { // Allow 'this' in clauses with varlists. if (auto *FTD = dyn_cast(New)) New = FTD->getTemplatedDecl(); auto *FD = cast(New); auto *ThisContext = dyn_cast_or_null(FD->getDeclContext()); SmallVector Uniforms, Aligneds, Alignments, Linears, Steps; SmallVector LinModifiers; auto SubstExpr = [&](Expr *E) -> ExprResult { if (auto *DRE = dyn_cast(E->IgnoreParenImpCasts())) if (auto *PVD = dyn_cast(DRE->getDecl())) { Sema::ContextRAII SavedContext(S, FD); LocalInstantiationScope Local(S); if (FD->getNumParams() > PVD->getFunctionScopeIndex()) Local.InstantiatedLocal( PVD, FD->getParamDecl(PVD->getFunctionScopeIndex())); return S.SubstExpr(E, TemplateArgs); } Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(), FD->isCXXInstanceMember()); return S.SubstExpr(E, TemplateArgs); }; // Substitute a single OpenMP clause, which is a potentially-evaluated // full-expression. auto Subst = [&](Expr *E) -> ExprResult { EnterExpressionEvaluationContext Evaluated( S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); ExprResult Res = SubstExpr(E); if (Res.isInvalid()) return Res; return S.ActOnFinishFullExpr(Res.get(), false); }; ExprResult Simdlen; if (auto *E = Attr.getSimdlen()) Simdlen = Subst(E); if (Attr.uniforms_size() > 0) { for(auto *E : Attr.uniforms()) { ExprResult Inst = Subst(E); if (Inst.isInvalid()) continue; Uniforms.push_back(Inst.get()); } } auto AI = Attr.alignments_begin(); for (auto *E : Attr.aligneds()) { ExprResult Inst = Subst(E); if (Inst.isInvalid()) continue; Aligneds.push_back(Inst.get()); Inst = ExprEmpty(); if (*AI) Inst = S.SubstExpr(*AI, TemplateArgs); Alignments.push_back(Inst.get()); ++AI; } auto SI = Attr.steps_begin(); for (auto *E : Attr.linears()) { ExprResult Inst = Subst(E); if (Inst.isInvalid()) continue; Linears.push_back(Inst.get()); Inst = ExprEmpty(); if (*SI) Inst = S.SubstExpr(*SI, TemplateArgs); Steps.push_back(Inst.get()); ++SI; } LinModifiers.append(Attr.modifiers_begin(), Attr.modifiers_end()); (void)S.ActOnOpenMPDeclareSimdDirective( S.ConvertDeclToDeclGroup(New), Attr.getBranchState(), Simdlen.get(), Uniforms, Aligneds, Alignments, Linears, LinModifiers, Steps, Attr.getRange()); } /// Instantiation of 'declare variant' attribute and its arguments. static void instantiateOMPDeclareVariantAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const OMPDeclareVariantAttr &Attr, Decl *New) { // Allow 'this' in clauses with varlists. if (auto *FTD = dyn_cast(New)) New = FTD->getTemplatedDecl(); auto *FD = cast(New); auto *ThisContext = dyn_cast_or_null(FD->getDeclContext()); auto &&SubstExpr = [FD, ThisContext, &S, &TemplateArgs](Expr *E) { if (auto *DRE = dyn_cast(E->IgnoreParenImpCasts())) if (auto *PVD = dyn_cast(DRE->getDecl())) { Sema::ContextRAII SavedContext(S, FD); LocalInstantiationScope Local(S); if (FD->getNumParams() > PVD->getFunctionScopeIndex()) Local.InstantiatedLocal( PVD, FD->getParamDecl(PVD->getFunctionScopeIndex())); return S.SubstExpr(E, TemplateArgs); } Sema::CXXThisScopeRAII ThisScope(S, ThisContext, Qualifiers(), FD->isCXXInstanceMember()); return S.SubstExpr(E, TemplateArgs); }; // Substitute a single OpenMP clause, which is a potentially-evaluated // full-expression. auto &&Subst = [&SubstExpr, &S](Expr *E) { EnterExpressionEvaluationContext Evaluated( S, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); ExprResult Res = SubstExpr(E); if (Res.isInvalid()) return Res; return S.ActOnFinishFullExpr(Res.get(), false); }; ExprResult VariantFuncRef; if (Expr *E = Attr.getVariantFuncRef()) { // Do not mark function as is used to prevent its emission if this is the // only place where it is used. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); VariantFuncRef = Subst(E); } // Copy the template version of the OMPTraitInfo and run substitute on all // score and condition expressiosn. OMPTraitInfo &TI = S.getASTContext().getNewOMPTraitInfo(); TI = *Attr.getTraitInfos(); // Try to substitute template parameters in score and condition expressions. auto SubstScoreOrConditionExpr = [&S, Subst](Expr *&E, bool) { if (E) { EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult ER = Subst(E); if (ER.isUsable()) E = ER.get(); else return true; } return false; }; if (TI.anyScoreOrCondition(SubstScoreOrConditionExpr)) return; Expr *E = VariantFuncRef.get(); // Check function/variant ref for `omp declare variant` but not for `omp // begin declare variant` (which use implicit attributes). Optional> DeclVarData = S.checkOpenMPDeclareVariantFunction(S.ConvertDeclToDeclGroup(New), E, TI, Attr.appendArgs_size(), Attr.getRange()); if (!DeclVarData) return; E = DeclVarData.getValue().second; FD = DeclVarData.getValue().first; if (auto *VariantDRE = dyn_cast(E->IgnoreParenImpCasts())) { if (auto *VariantFD = dyn_cast(VariantDRE->getDecl())) { if (auto *VariantFTD = VariantFD->getDescribedFunctionTemplate()) { if (!VariantFTD->isThisDeclarationADefinition()) return; Sema::TentativeAnalysisScope Trap(S); const TemplateArgumentList *TAL = TemplateArgumentList::CreateCopy( S.Context, TemplateArgs.getInnermost()); auto *SubstFD = S.InstantiateFunctionDeclaration(VariantFTD, TAL, New->getLocation()); if (!SubstFD) return; QualType NewType = S.Context.mergeFunctionTypes( SubstFD->getType(), FD->getType(), /* OfBlockPointer */ false, /* Unqualified */ false, /* AllowCXX */ true); if (NewType.isNull()) return; S.InstantiateFunctionDefinition( New->getLocation(), SubstFD, /* Recursive */ true, /* DefinitionRequired */ false, /* AtEndOfTU */ false); SubstFD->setInstantiationIsPending(!SubstFD->isDefined()); E = DeclRefExpr::Create(S.Context, NestedNameSpecifierLoc(), SourceLocation(), SubstFD, /* RefersToEnclosingVariableOrCapture */ false, /* NameLoc */ SubstFD->getLocation(), SubstFD->getType(), ExprValueKind::VK_PRValue); } } } SmallVector NothingExprs; SmallVector NeedDevicePtrExprs; SmallVector AppendArgs; for (Expr *E : Attr.adjustArgsNothing()) { ExprResult ER = Subst(E); if (ER.isInvalid()) continue; NothingExprs.push_back(ER.get()); } for (Expr *E : Attr.adjustArgsNeedDevicePtr()) { ExprResult ER = Subst(E); if (ER.isInvalid()) continue; NeedDevicePtrExprs.push_back(ER.get()); } for (auto A : Attr.appendArgs()) AppendArgs.push_back(A); S.ActOnOpenMPDeclareVariantDirective( FD, E, TI, NothingExprs, NeedDevicePtrExprs, AppendArgs, SourceLocation(), SourceLocation(), Attr.getRange()); } static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AMDGPUFlatWorkGroupSizeAttr &Attr, Decl *New) { // Both min and max expression are constant expressions. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); if (Result.isInvalid()) return; Expr *MinExpr = Result.getAs(); Result = S.SubstExpr(Attr.getMax(), TemplateArgs); if (Result.isInvalid()) return; Expr *MaxExpr = Result.getAs(); S.addAMDGPUFlatWorkGroupSizeAttr(New, Attr, MinExpr, MaxExpr); } static ExplicitSpecifier instantiateExplicitSpecifier(Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, ExplicitSpecifier ES, FunctionDecl *New) { if (!ES.getExpr()) return ES; Expr *OldCond = ES.getExpr(); Expr *Cond = nullptr; { EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult SubstResult = S.SubstExpr(OldCond, TemplateArgs); if (SubstResult.isInvalid()) { return ExplicitSpecifier::Invalid(); } Cond = SubstResult.get(); } ExplicitSpecifier Result(Cond, ES.getKind()); if (!Cond->isTypeDependent()) S.tryResolveExplicitSpecifier(Result); return Result; } static void instantiateDependentAMDGPUWavesPerEUAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const AMDGPUWavesPerEUAttr &Attr, Decl *New) { // Both min and max expression are constant expressions. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); if (Result.isInvalid()) return; Expr *MinExpr = Result.getAs(); Expr *MaxExpr = nullptr; if (auto Max = Attr.getMax()) { Result = S.SubstExpr(Max, TemplateArgs); if (Result.isInvalid()) return; MaxExpr = Result.getAs(); } S.addAMDGPUWavesPerEUAttr(New, Attr, MinExpr, MaxExpr); } // This doesn't take any template parameters, but we have a custom action that // needs to happen when the kernel itself is instantiated. We need to run the // ItaniumMangler to mark the names required to name this kernel. static void instantiateDependentSYCLKernelAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const SYCLKernelAttr &Attr, Decl *New) { New->addAttr(Attr.clone(S.getASTContext())); } /// Determine whether the attribute A might be relevant to the declaration D. /// If not, we can skip instantiating it. The attribute may or may not have /// been instantiated yet. static bool isRelevantAttr(Sema &S, const Decl *D, const Attr *A) { // 'preferred_name' is only relevant to the matching specialization of the // template. if (const auto *PNA = dyn_cast(A)) { QualType T = PNA->getTypedefType(); const auto *RD = cast(D); if (!T->isDependentType() && !RD->isDependentContext() && !declaresSameEntity(T->getAsCXXRecordDecl(), RD)) return false; for (const auto *ExistingPNA : D->specific_attrs()) if (S.Context.hasSameType(ExistingPNA->getTypedefType(), PNA->getTypedefType())) return false; return true; } return true; } void Sema::InstantiateAttrsForDecl( const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, LocalInstantiationScope *OuterMostScope) { if (NamedDecl *ND = dyn_cast(New)) { // FIXME: This function is called multiple times for the same template // specialization. We should only instantiate attributes that were added // since the previous instantiation. for (const auto *TmplAttr : Tmpl->attrs()) { if (!isRelevantAttr(*this, New, TmplAttr)) continue; // FIXME: If any of the special case versions from InstantiateAttrs become // applicable to template declaration, we'll need to add them here. CXXThisScopeRAII ThisScope( *this, dyn_cast_or_null(ND->getDeclContext()), Qualifiers(), ND->isCXXInstanceMember()); Attr *NewAttr = sema::instantiateTemplateAttributeForDecl( TmplAttr, Context, *this, TemplateArgs); if (NewAttr && isRelevantAttr(*this, New, NewAttr)) New->addAttr(NewAttr); } } } static Sema::RetainOwnershipKind attrToRetainOwnershipKind(const Attr *A) { switch (A->getKind()) { case clang::attr::CFConsumed: return Sema::RetainOwnershipKind::CF; case clang::attr::OSConsumed: return Sema::RetainOwnershipKind::OS; case clang::attr::NSConsumed: return Sema::RetainOwnershipKind::NS; default: llvm_unreachable("Wrong argument supplied"); } } void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, LocalInstantiationScope *OuterMostScope) { for (const auto *TmplAttr : Tmpl->attrs()) { if (!isRelevantAttr(*this, New, TmplAttr)) continue; // FIXME: This should be generalized to more than just the AlignedAttr. const AlignedAttr *Aligned = dyn_cast(TmplAttr); if (Aligned && Aligned->isAlignmentDependent()) { instantiateDependentAlignedAttr(*this, TemplateArgs, Aligned, New); continue; } if (const auto *AssumeAligned = dyn_cast(TmplAttr)) { instantiateDependentAssumeAlignedAttr(*this, TemplateArgs, AssumeAligned, New); continue; } if (const auto *AlignValue = dyn_cast(TmplAttr)) { instantiateDependentAlignValueAttr(*this, TemplateArgs, AlignValue, New); continue; } if (const auto *AllocAlign = dyn_cast(TmplAttr)) { instantiateDependentAllocAlignAttr(*this, TemplateArgs, AllocAlign, New); continue; } if (const auto *Annotate = dyn_cast(TmplAttr)) { instantiateDependentAnnotationAttr(*this, TemplateArgs, Annotate, New); continue; } if (const auto *EnableIf = dyn_cast(TmplAttr)) { instantiateDependentEnableIfAttr(*this, TemplateArgs, EnableIf, Tmpl, cast(New)); continue; } if (const auto *DiagnoseIf = dyn_cast(TmplAttr)) { instantiateDependentDiagnoseIfAttr(*this, TemplateArgs, DiagnoseIf, Tmpl, cast(New)); continue; } if (const auto *CUDALaunchBounds = dyn_cast(TmplAttr)) { instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs, *CUDALaunchBounds, New); continue; } if (const auto *Mode = dyn_cast(TmplAttr)) { instantiateDependentModeAttr(*this, TemplateArgs, *Mode, New); continue; } if (const auto *OMPAttr = dyn_cast(TmplAttr)) { instantiateOMPDeclareSimdDeclAttr(*this, TemplateArgs, *OMPAttr, New); continue; } if (const auto *OMPAttr = dyn_cast(TmplAttr)) { instantiateOMPDeclareVariantAttr(*this, TemplateArgs, *OMPAttr, New); continue; } if (const auto *AMDGPUFlatWorkGroupSize = dyn_cast(TmplAttr)) { instantiateDependentAMDGPUFlatWorkGroupSizeAttr( *this, TemplateArgs, *AMDGPUFlatWorkGroupSize, New); } if (const auto *AMDGPUFlatWorkGroupSize = dyn_cast(TmplAttr)) { instantiateDependentAMDGPUWavesPerEUAttr(*this, TemplateArgs, *AMDGPUFlatWorkGroupSize, New); } // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { if (New->hasAttr() || New->hasAttr()) { continue; } } if (const auto *ABIAttr = dyn_cast(TmplAttr)) { AddParameterABIAttr(New, *ABIAttr, ABIAttr->getABI()); continue; } if (isa(TmplAttr) || isa(TmplAttr) || isa(TmplAttr)) { AddXConsumedAttr(New, *TmplAttr, attrToRetainOwnershipKind(TmplAttr), /*template instantiation=*/true); continue; } if (auto *A = dyn_cast(TmplAttr)) { if (!New->hasAttr()) New->addAttr(A->clone(Context)); continue; } if (auto *A = dyn_cast(TmplAttr)) { if (!New->hasAttr()) New->addAttr(A->clone(Context)); continue; } if (auto *A = dyn_cast(TmplAttr)) { instantiateDependentSYCLKernelAttr(*this, TemplateArgs, *A, New); continue; } assert(!TmplAttr->isPackExpansion()); if (TmplAttr->isLateParsed() && LateAttrs) { // Late parsed attributes must be instantiated and attached after the // enclosing class has been instantiated. See Sema::InstantiateClass. LocalInstantiationScope *Saved = nullptr; if (CurrentInstantiationScope) Saved = CurrentInstantiationScope->cloneScopes(OuterMostScope); LateAttrs->push_back(LateInstantiatedAttribute(TmplAttr, Saved, New)); } else { // Allow 'this' within late-parsed attributes. auto *ND = cast(New); auto *ThisContext = dyn_cast_or_null(ND->getDeclContext()); CXXThisScopeRAII ThisScope(*this, ThisContext, Qualifiers(), ND->isCXXInstanceMember()); Attr *NewAttr = sema::instantiateTemplateAttribute(TmplAttr, Context, *this, TemplateArgs); if (NewAttr && isRelevantAttr(*this, New, TmplAttr)) New->addAttr(NewAttr); } } } /// In the MS ABI, we need to instantiate default arguments of dllexported /// default constructors along with the constructor definition. This allows IR /// gen to emit a constructor closure which calls the default constructor with /// its default arguments. void Sema::InstantiateDefaultCtorDefaultArgs(CXXConstructorDecl *Ctor) { assert(Context.getTargetInfo().getCXXABI().isMicrosoft() && Ctor->isDefaultConstructor()); unsigned NumParams = Ctor->getNumParams(); if (NumParams == 0) return; DLLExportAttr *Attr = Ctor->getAttr(); if (!Attr) return; for (unsigned I = 0; I != NumParams; ++I) { (void)CheckCXXDefaultArgExpr(Attr->getLocation(), Ctor, Ctor->getParamDecl(I)); DiscardCleanupsInEvaluationContext(); } } /// Get the previous declaration of a declaration for the purposes of template /// instantiation. If this finds a previous declaration, then the previous /// declaration of the instantiation of D should be an instantiation of the /// result of this function. template static DeclT *getPreviousDeclForInstantiation(DeclT *D) { DeclT *Result = D->getPreviousDecl(); // If the declaration is within a class, and the previous declaration was // merged from a different definition of that class, then we don't have a // previous declaration for the purpose of template instantiation. if (Result && isa(D->getDeclContext()) && D->getLexicalDeclContext() != Result->getLexicalDeclContext()) return nullptr; return Result; } Decl * TemplateDeclInstantiator::VisitTranslationUnitDecl(TranslationUnitDecl *D) { llvm_unreachable("Translation units cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitPragmaCommentDecl(PragmaCommentDecl *D) { llvm_unreachable("pragma comment cannot be instantiated"); } Decl *TemplateDeclInstantiator::VisitPragmaDetectMismatchDecl( PragmaDetectMismatchDecl *D) { llvm_unreachable("pragma comment cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitExternCContextDecl(ExternCContextDecl *D) { llvm_unreachable("extern \"C\" context cannot be instantiated"); } Decl *TemplateDeclInstantiator::VisitMSGuidDecl(MSGuidDecl *D) { llvm_unreachable("GUID declaration cannot be instantiated"); } Decl *TemplateDeclInstantiator::VisitTemplateParamObjectDecl( TemplateParamObjectDecl *D) { llvm_unreachable("template parameter objects cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitLabelDecl(LabelDecl *D) { LabelDecl *Inst = LabelDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getIdentifier()); Owner->addDecl(Inst); return Inst; } Decl * TemplateDeclInstantiator::VisitNamespaceDecl(NamespaceDecl *D) { llvm_unreachable("Namespaces cannot be instantiated"); } Decl * TemplateDeclInstantiator::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { NamespaceAliasDecl *Inst = NamespaceAliasDecl::Create(SemaRef.Context, Owner, D->getNamespaceLoc(), D->getAliasLoc(), D->getIdentifier(), D->getQualifierLoc(), D->getTargetNameLoc(), D->getNamespace()); Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::InstantiateTypedefNameDecl(TypedefNameDecl *D, bool IsTypeAlias) { bool Invalid = false; TypeSourceInfo *DI = D->getTypeSourceInfo(); if (DI->getType()->isInstantiationDependentType() || DI->getType()->isVariablyModifiedType()) { DI = SemaRef.SubstType(DI, TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) { Invalid = true; DI = SemaRef.Context.getTrivialTypeSourceInfo(SemaRef.Context.IntTy); } } else { SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); } // HACK: 2012-10-23 g++ has a bug where it gets the value kind of ?: wrong. // libstdc++ relies upon this bug in its implementation of common_type. If we // happen to be processing that implementation, fake up the g++ ?: // semantics. See LWG issue 2141 for more information on the bug. The bugs // are fixed in g++ and libstdc++ 4.9.0 (2014-04-22). const DecltypeType *DT = DI->getType()->getAs(); CXXRecordDecl *RD = dyn_cast(D->getDeclContext()); if (DT && RD && isa(DT->getUnderlyingExpr()) && DT->isReferenceType() && RD->getEnclosingNamespaceContext() == SemaRef.getStdNamespace() && RD->getIdentifier() && RD->getIdentifier()->isStr("common_type") && D->getIdentifier() && D->getIdentifier()->isStr("type") && SemaRef.getSourceManager().isInSystemHeader(D->getBeginLoc())) // Fold it to the (non-reference) type which g++ would have produced. DI = SemaRef.Context.getTrivialTypeSourceInfo( DI->getType().getNonReferenceType()); // Create the new typedef TypedefNameDecl *Typedef; if (IsTypeAlias) Typedef = TypeAliasDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), DI); else Typedef = TypedefDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), DI); if (Invalid) Typedef->setInvalidDecl(); // If the old typedef was the name for linkage purposes of an anonymous // tag decl, re-establish that relationship for the new typedef. if (const TagType *oldTagType = D->getUnderlyingType()->getAs()) { TagDecl *oldTag = oldTagType->getDecl(); if (oldTag->getTypedefNameForAnonDecl() == D && !Invalid) { TagDecl *newTag = DI->getType()->castAs()->getDecl(); assert(!newTag->hasNameForLinkage()); newTag->setTypedefNameForAnonDecl(Typedef); } } if (TypedefNameDecl *Prev = getPreviousDeclForInstantiation(D)) { NamedDecl *InstPrev = SemaRef.FindInstantiatedDecl(D->getLocation(), Prev, TemplateArgs); if (!InstPrev) return nullptr; TypedefNameDecl *InstPrevTypedef = cast(InstPrev); // If the typedef types are not identical, reject them. SemaRef.isIncompatibleTypedef(InstPrevTypedef, Typedef); Typedef->setPreviousDecl(InstPrevTypedef); } SemaRef.InstantiateAttrs(TemplateArgs, D, Typedef); if (D->getUnderlyingType()->getAs()) SemaRef.inferGslPointerAttribute(Typedef); Typedef->setAccess(D->getAccess()); return Typedef; } Decl *TemplateDeclInstantiator::VisitTypedefDecl(TypedefDecl *D) { Decl *Typedef = InstantiateTypedefNameDecl(D, /*IsTypeAlias=*/false); if (Typedef) Owner->addDecl(Typedef); return Typedef; } Decl *TemplateDeclInstantiator::VisitTypeAliasDecl(TypeAliasDecl *D) { Decl *Typedef = InstantiateTypedefNameDecl(D, /*IsTypeAlias=*/true); if (Typedef) Owner->addDecl(Typedef); return Typedef; } Decl * TemplateDeclInstantiator::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { // Create a local instantiation scope for this type alias template, which // will contain the instantiations of the template parameters. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; TypeAliasDecl *Pattern = D->getTemplatedDecl(); TypeAliasTemplateDecl *PrevAliasTemplate = nullptr; if (getPreviousDeclForInstantiation(Pattern)) { DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName()); if (!Found.empty()) { PrevAliasTemplate = dyn_cast(Found.front()); } } TypeAliasDecl *AliasInst = cast_or_null( InstantiateTypedefNameDecl(Pattern, /*IsTypeAlias=*/true)); if (!AliasInst) return nullptr; TypeAliasTemplateDecl *Inst = TypeAliasTemplateDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), InstParams, AliasInst); AliasInst->setDescribedAliasTemplate(Inst); if (PrevAliasTemplate) Inst->setPreviousDecl(PrevAliasTemplate); Inst->setAccess(D->getAccess()); if (!PrevAliasTemplate) Inst->setInstantiatedFromMemberTemplate(D); Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitBindingDecl(BindingDecl *D) { auto *NewBD = BindingDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getIdentifier()); NewBD->setReferenced(D->isReferenced()); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewBD); return NewBD; } Decl *TemplateDeclInstantiator::VisitDecompositionDecl(DecompositionDecl *D) { // Transform the bindings first. SmallVector NewBindings; for (auto *OldBD : D->bindings()) NewBindings.push_back(cast(VisitBindingDecl(OldBD))); ArrayRef NewBindingArray = NewBindings; auto *NewDD = cast_or_null( VisitVarDecl(D, /*InstantiatingVarTemplate=*/false, &NewBindingArray)); if (!NewDD || NewDD->isInvalidDecl()) for (auto *NewBD : NewBindings) NewBD->setInvalidDecl(); return NewDD; } Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D) { return VisitVarDecl(D, /*InstantiatingVarTemplate=*/false); } Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D, bool InstantiatingVarTemplate, ArrayRef *Bindings) { // Do substitution on the type of the declaration TypeSourceInfo *DI = SemaRef.SubstType( D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(), D->getDeclName(), /*AllowDeducedTST*/true); if (!DI) return nullptr; if (DI->getType()->isFunctionType()) { SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function) << D->isStaticDataMember() << DI->getType(); return nullptr; } DeclContext *DC = Owner; if (D->isLocalExternDecl()) SemaRef.adjustContextForLocalExternDecl(DC); // Build the instantiated declaration. VarDecl *Var; if (Bindings) Var = DecompositionDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(), D->getLocation(), DI->getType(), DI, D->getStorageClass(), *Bindings); else Var = VarDecl::Create(SemaRef.Context, DC, D->getInnerLocStart(), D->getLocation(), D->getIdentifier(), DI->getType(), DI, D->getStorageClass()); // In ARC, infer 'retaining' for variables of retainable type. if (SemaRef.getLangOpts().ObjCAutoRefCount && SemaRef.inferObjCARCLifetime(Var)) Var->setInvalidDecl(); if (SemaRef.getLangOpts().OpenCL) SemaRef.deduceOpenCLAddressSpace(Var); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Var)) return nullptr; SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner, StartingScope, InstantiatingVarTemplate); if (D->isNRVOVariable() && !Var->isInvalidDecl()) { QualType RT; if (auto *F = dyn_cast(DC)) RT = F->getReturnType(); else if (isa(DC)) RT = cast(SemaRef.getCurBlock()->FunctionType) ->getReturnType(); else llvm_unreachable("Unknown context type"); // This is the last chance we have of checking copy elision eligibility // for functions in dependent contexts. The sema actions for building // the return statement during template instantiation will have no effect // regarding copy elision, since NRVO propagation runs on the scope exit // actions, and these are not run on instantiation. // This might run through some VarDecls which were returned from non-taken // 'if constexpr' branches, and these will end up being constructed on the // return slot even if they will never be returned, as a sort of accidental // 'optimization'. Notably, functions with 'auto' return types won't have it // deduced by this point. Coupled with the limitation described // previously, this makes it very hard to support copy elision for these. Sema::NamedReturnInfo Info = SemaRef.getNamedReturnInfo(Var); bool NRVO = SemaRef.getCopyElisionCandidate(Info, RT) != nullptr; Var->setNRVOVariable(NRVO); } Var->setImplicit(D->isImplicit()); if (Var->isStaticLocal()) SemaRef.CheckStaticLocalForDllExport(Var); return Var; } Decl *TemplateDeclInstantiator::VisitAccessSpecDecl(AccessSpecDecl *D) { AccessSpecDecl* AD = AccessSpecDecl::Create(SemaRef.Context, D->getAccess(), Owner, D->getAccessSpecifierLoc(), D->getColonLoc()); Owner->addHiddenDecl(AD); return AD; } Decl *TemplateDeclInstantiator::VisitFieldDecl(FieldDecl *D) { bool Invalid = false; TypeSourceInfo *DI = D->getTypeSourceInfo(); if (DI->getType()->isInstantiationDependentType() || DI->getType()->isVariablyModifiedType()) { DI = SemaRef.SubstType(DI, TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) { DI = D->getTypeSourceInfo(); Invalid = true; } else if (DI->getType()->isFunctionType()) { // C++ [temp.arg.type]p3: // If a declaration acquires a function type through a type // dependent on a template-parameter and this causes a // declaration that does not use the syntactic form of a // function declarator to have function type, the program is // ill-formed. SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function) << DI->getType(); Invalid = true; } } else { SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); } Expr *BitWidth = D->getBitWidth(); if (Invalid) BitWidth = nullptr; else if (BitWidth) { // The bit-width expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult InstantiatedBitWidth = SemaRef.SubstExpr(BitWidth, TemplateArgs); if (InstantiatedBitWidth.isInvalid()) { Invalid = true; BitWidth = nullptr; } else BitWidth = InstantiatedBitWidth.getAs(); } FieldDecl *Field = SemaRef.CheckFieldDecl(D->getDeclName(), DI->getType(), DI, cast(Owner), D->getLocation(), D->isMutable(), BitWidth, D->getInClassInitStyle(), D->getInnerLocStart(), D->getAccess(), nullptr); if (!Field) { cast(Owner)->setInvalidDecl(); return nullptr; } SemaRef.InstantiateAttrs(TemplateArgs, D, Field, LateAttrs, StartingScope); if (Field->hasAttrs()) SemaRef.CheckAlignasUnderalignment(Field); if (Invalid) Field->setInvalidDecl(); if (!Field->getDeclName()) { // Keep track of where this decl came from. SemaRef.Context.setInstantiatedFromUnnamedFieldDecl(Field, D); } if (CXXRecordDecl *Parent= dyn_cast(Field->getDeclContext())) { if (Parent->isAnonymousStructOrUnion() && Parent->getRedeclContext()->isFunctionOrMethod()) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Field); } Field->setImplicit(D->isImplicit()); Field->setAccess(D->getAccess()); Owner->addDecl(Field); return Field; } Decl *TemplateDeclInstantiator::VisitMSPropertyDecl(MSPropertyDecl *D) { bool Invalid = false; TypeSourceInfo *DI = D->getTypeSourceInfo(); if (DI->getType()->isVariablyModifiedType()) { SemaRef.Diag(D->getLocation(), diag::err_property_is_variably_modified) << D; Invalid = true; } else if (DI->getType()->isInstantiationDependentType()) { DI = SemaRef.SubstType(DI, TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) { DI = D->getTypeSourceInfo(); Invalid = true; } else if (DI->getType()->isFunctionType()) { // C++ [temp.arg.type]p3: // If a declaration acquires a function type through a type // dependent on a template-parameter and this causes a // declaration that does not use the syntactic form of a // function declarator to have function type, the program is // ill-formed. SemaRef.Diag(D->getLocation(), diag::err_field_instantiates_to_function) << DI->getType(); Invalid = true; } } else { SemaRef.MarkDeclarationsReferencedInType(D->getLocation(), DI->getType()); } MSPropertyDecl *Property = MSPropertyDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getDeclName(), DI->getType(), DI, D->getBeginLoc(), D->getGetterId(), D->getSetterId()); SemaRef.InstantiateAttrs(TemplateArgs, D, Property, LateAttrs, StartingScope); if (Invalid) Property->setInvalidDecl(); Property->setAccess(D->getAccess()); Owner->addDecl(Property); return Property; } Decl *TemplateDeclInstantiator::VisitIndirectFieldDecl(IndirectFieldDecl *D) { NamedDecl **NamedChain = new (SemaRef.Context)NamedDecl*[D->getChainingSize()]; int i = 0; for (auto *PI : D->chain()) { NamedDecl *Next = SemaRef.FindInstantiatedDecl(D->getLocation(), PI, TemplateArgs); if (!Next) return nullptr; NamedChain[i++] = Next; } QualType T = cast(NamedChain[i-1])->getType(); IndirectFieldDecl *IndirectField = IndirectFieldDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getIdentifier(), T, {NamedChain, D->getChainingSize()}); for (const auto *Attr : D->attrs()) IndirectField->addAttr(Attr->clone(SemaRef.Context)); IndirectField->setImplicit(D->isImplicit()); IndirectField->setAccess(D->getAccess()); Owner->addDecl(IndirectField); return IndirectField; } Decl *TemplateDeclInstantiator::VisitFriendDecl(FriendDecl *D) { // Handle friend type expressions by simply substituting template // parameters into the pattern type and checking the result. if (TypeSourceInfo *Ty = D->getFriendType()) { TypeSourceInfo *InstTy; // If this is an unsupported friend, don't bother substituting template // arguments into it. The actual type referred to won't be used by any // parts of Clang, and may not be valid for instantiating. Just use the // same info for the instantiated friend. if (D->isUnsupportedFriend()) { InstTy = Ty; } else { InstTy = SemaRef.SubstType(Ty, TemplateArgs, D->getLocation(), DeclarationName()); } if (!InstTy) return nullptr; FriendDecl *FD = SemaRef.CheckFriendTypeDecl(D->getBeginLoc(), D->getFriendLoc(), InstTy); if (!FD) return nullptr; FD->setAccess(AS_public); FD->setUnsupportedFriend(D->isUnsupportedFriend()); Owner->addDecl(FD); return FD; } NamedDecl *ND = D->getFriendDecl(); assert(ND && "friend decl must be a decl or a type!"); // All of the Visit implementations for the various potential friend // declarations have to be carefully written to work for friend // objects, with the most important detail being that the target // decl should almost certainly not be placed in Owner. Decl *NewND = Visit(ND); if (!NewND) return nullptr; FriendDecl *FD = FriendDecl::Create(SemaRef.Context, Owner, D->getLocation(), cast(NewND), D->getFriendLoc()); FD->setAccess(AS_public); FD->setUnsupportedFriend(D->isUnsupportedFriend()); Owner->addDecl(FD); return FD; } Decl *TemplateDeclInstantiator::VisitStaticAssertDecl(StaticAssertDecl *D) { Expr *AssertExpr = D->getAssertExpr(); // The expression in a static assertion is a constant expression. EnterExpressionEvaluationContext Unevaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult InstantiatedAssertExpr = SemaRef.SubstExpr(AssertExpr, TemplateArgs); if (InstantiatedAssertExpr.isInvalid()) return nullptr; return SemaRef.BuildStaticAssertDeclaration(D->getLocation(), InstantiatedAssertExpr.get(), D->getMessage(), D->getRParenLoc(), D->isFailed()); } Decl *TemplateDeclInstantiator::VisitEnumDecl(EnumDecl *D) { EnumDecl *PrevDecl = nullptr; if (EnumDecl *PatternPrev = getPreviousDeclForInstantiation(D)) { NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(), PatternPrev, TemplateArgs); if (!Prev) return nullptr; PrevDecl = cast(Prev); } EnumDecl *Enum = EnumDecl::Create(SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), PrevDecl, D->isScoped(), D->isScopedUsingClassTag(), D->isFixed()); if (D->isFixed()) { if (TypeSourceInfo *TI = D->getIntegerTypeSourceInfo()) { // If we have type source information for the underlying type, it means it // has been explicitly set by the user. Perform substitution on it before // moving on. SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc(); TypeSourceInfo *NewTI = SemaRef.SubstType(TI, TemplateArgs, UnderlyingLoc, DeclarationName()); if (!NewTI || SemaRef.CheckEnumUnderlyingType(NewTI)) Enum->setIntegerType(SemaRef.Context.IntTy); else Enum->setIntegerTypeSourceInfo(NewTI); } else { assert(!D->getIntegerType()->isDependentType() && "Dependent type without type source info"); Enum->setIntegerType(D->getIntegerType()); } } SemaRef.InstantiateAttrs(TemplateArgs, D, Enum); Enum->setInstantiationOfMemberEnum(D, TSK_ImplicitInstantiation); Enum->setAccess(D->getAccess()); // Forward the mangling number from the template to the instantiated decl. SemaRef.Context.setManglingNumber(Enum, SemaRef.Context.getManglingNumber(D)); // See if the old tag was defined along with a declarator. // If it did, mark the new tag as being associated with that declarator. if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D)) SemaRef.Context.addDeclaratorForUnnamedTagDecl(Enum, DD); // See if the old tag was defined along with a typedef. // If it did, mark the new tag as being associated with that typedef. if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D)) SemaRef.Context.addTypedefNameForUnnamedTagDecl(Enum, TND); if (SubstQualifier(D, Enum)) return nullptr; Owner->addDecl(Enum); EnumDecl *Def = D->getDefinition(); if (Def && Def != D) { // If this is an out-of-line definition of an enum member template, check // that the underlying types match in the instantiation of both // declarations. if (TypeSourceInfo *TI = Def->getIntegerTypeSourceInfo()) { SourceLocation UnderlyingLoc = TI->getTypeLoc().getBeginLoc(); QualType DefnUnderlying = SemaRef.SubstType(TI->getType(), TemplateArgs, UnderlyingLoc, DeclarationName()); SemaRef.CheckEnumRedeclaration(Def->getLocation(), Def->isScoped(), DefnUnderlying, /*IsFixed=*/true, Enum); } } // C++11 [temp.inst]p1: The implicit instantiation of a class template // specialization causes the implicit instantiation of the declarations, but // not the definitions of scoped member enumerations. // // DR1484 clarifies that enumeration definitions inside of a template // declaration aren't considered entities that can be separately instantiated // from the rest of the entity they are declared inside of. if (isDeclWithinFunction(D) ? D == Def : Def && !Enum->isScoped()) { SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Enum); InstantiateEnumDefinition(Enum, Def); } return Enum; } void TemplateDeclInstantiator::InstantiateEnumDefinition( EnumDecl *Enum, EnumDecl *Pattern) { Enum->startDefinition(); // Update the location to refer to the definition. Enum->setLocation(Pattern->getLocation()); SmallVector Enumerators; EnumConstantDecl *LastEnumConst = nullptr; for (auto *EC : Pattern->enumerators()) { // The specified value for the enumerator. ExprResult Value((Expr *)nullptr); if (Expr *UninstValue = EC->getInitExpr()) { // The enumerator's value expression is a constant expression. EnterExpressionEvaluationContext Unevaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); Value = SemaRef.SubstExpr(UninstValue, TemplateArgs); } // Drop the initial value and continue. bool isInvalid = false; if (Value.isInvalid()) { Value = nullptr; isInvalid = true; } EnumConstantDecl *EnumConst = SemaRef.CheckEnumConstant(Enum, LastEnumConst, EC->getLocation(), EC->getIdentifier(), Value.get()); if (isInvalid) { if (EnumConst) EnumConst->setInvalidDecl(); Enum->setInvalidDecl(); } if (EnumConst) { SemaRef.InstantiateAttrs(TemplateArgs, EC, EnumConst); EnumConst->setAccess(Enum->getAccess()); Enum->addDecl(EnumConst); Enumerators.push_back(EnumConst); LastEnumConst = EnumConst; if (Pattern->getDeclContext()->isFunctionOrMethod() && !Enum->isScoped()) { // If the enumeration is within a function or method, record the enum // constant as a local. SemaRef.CurrentInstantiationScope->InstantiatedLocal(EC, EnumConst); } } } SemaRef.ActOnEnumBody(Enum->getLocation(), Enum->getBraceRange(), Enum, Enumerators, nullptr, ParsedAttributesView()); } Decl *TemplateDeclInstantiator::VisitEnumConstantDecl(EnumConstantDecl *D) { llvm_unreachable("EnumConstantDecls can only occur within EnumDecls."); } Decl * TemplateDeclInstantiator::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) { llvm_unreachable("BuiltinTemplateDecls cannot be instantiated."); } Decl *TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl *D) { bool isFriend = (D->getFriendObjectKind() != Decl::FOK_None); // Create a local instantiation scope for this class template, which // will contain the instantiations of the template parameters. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; CXXRecordDecl *Pattern = D->getTemplatedDecl(); // Instantiate the qualifier. We have to do this first in case // we're a friend declaration, because if we are then we need to put // the new declaration in the appropriate context. NestedNameSpecifierLoc QualifierLoc = Pattern->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); if (!QualifierLoc) return nullptr; } CXXRecordDecl *PrevDecl = nullptr; ClassTemplateDecl *PrevClassTemplate = nullptr; if (!isFriend && getPreviousDeclForInstantiation(Pattern)) { DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName()); if (!Found.empty()) { PrevClassTemplate = dyn_cast(Found.front()); if (PrevClassTemplate) PrevDecl = PrevClassTemplate->getTemplatedDecl(); } } // If this isn't a friend, then it's a member template, in which // case we just want to build the instantiation in the // specialization. If it is a friend, we want to build it in // the appropriate context. DeclContext *DC = Owner; if (isFriend) { if (QualifierLoc) { CXXScopeSpec SS; SS.Adopt(QualifierLoc); DC = SemaRef.computeDeclContext(SS); if (!DC) return nullptr; } else { DC = SemaRef.FindInstantiatedContext(Pattern->getLocation(), Pattern->getDeclContext(), TemplateArgs); } // Look for a previous declaration of the template in the owning // context. LookupResult R(SemaRef, Pattern->getDeclName(), Pattern->getLocation(), Sema::LookupOrdinaryName, SemaRef.forRedeclarationInCurContext()); SemaRef.LookupQualifiedName(R, DC); if (R.isSingleResult()) { PrevClassTemplate = R.getAsSingle(); if (PrevClassTemplate) PrevDecl = PrevClassTemplate->getTemplatedDecl(); } if (!PrevClassTemplate && QualifierLoc) { SemaRef.Diag(Pattern->getLocation(), diag::err_not_tag_in_scope) << D->getTemplatedDecl()->getTagKind() << Pattern->getDeclName() << DC << QualifierLoc.getSourceRange(); return nullptr; } if (PrevClassTemplate) { TemplateParameterList *PrevParams = PrevClassTemplate->getMostRecentDecl()->getTemplateParameters(); // Make sure the parameter lists match. if (!SemaRef.TemplateParameterListsAreEqual(InstParams, PrevParams, true, Sema::TPL_TemplateMatch)) return nullptr; // Do some additional validation, then merge default arguments // from the existing declarations. if (SemaRef.CheckTemplateParameterList(InstParams, PrevParams, Sema::TPC_ClassTemplate)) return nullptr; } } CXXRecordDecl *RecordInst = CXXRecordDecl::Create( SemaRef.Context, Pattern->getTagKind(), DC, Pattern->getBeginLoc(), Pattern->getLocation(), Pattern->getIdentifier(), PrevDecl, /*DelayTypeCreation=*/true); if (QualifierLoc) RecordInst->setQualifierInfo(QualifierLoc); SemaRef.InstantiateAttrsForDecl(TemplateArgs, Pattern, RecordInst, LateAttrs, StartingScope); ClassTemplateDecl *Inst = ClassTemplateDecl::Create(SemaRef.Context, DC, D->getLocation(), D->getIdentifier(), InstParams, RecordInst); assert(!(isFriend && Owner->isDependentContext())); Inst->setPreviousDecl(PrevClassTemplate); RecordInst->setDescribedClassTemplate(Inst); if (isFriend) { if (PrevClassTemplate) Inst->setAccess(PrevClassTemplate->getAccess()); else Inst->setAccess(D->getAccess()); Inst->setObjectOfFriendDecl(); // TODO: do we want to track the instantiation progeny of this // friend target decl? } else { Inst->setAccess(D->getAccess()); if (!PrevClassTemplate) Inst->setInstantiatedFromMemberTemplate(D); } // Trigger creation of the type for the instantiation. SemaRef.Context.getInjectedClassNameType(RecordInst, Inst->getInjectedClassNameSpecialization()); // Finish handling of friends. if (isFriend) { DC->makeDeclVisibleInContext(Inst); Inst->setLexicalDeclContext(Owner); RecordInst->setLexicalDeclContext(Owner); return Inst; } if (D->isOutOfLine()) { Inst->setLexicalDeclContext(D->getLexicalDeclContext()); RecordInst->setLexicalDeclContext(D->getLexicalDeclContext()); } Owner->addDecl(Inst); if (!PrevClassTemplate) { // Queue up any out-of-line partial specializations of this member // class template; the client will force their instantiation once // the enclosing class has been instantiated. SmallVector PartialSpecs; D->getPartialSpecializations(PartialSpecs); for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) if (PartialSpecs[I]->getFirstDecl()->isOutOfLine()) OutOfLinePartialSpecs.push_back(std::make_pair(Inst, PartialSpecs[I])); } return Inst; } Decl * TemplateDeclInstantiator::VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate(); // Lookup the already-instantiated declaration in the instantiation // of the class template and return that. DeclContext::lookup_result Found = Owner->lookup(ClassTemplate->getDeclName()); if (Found.empty()) return nullptr; ClassTemplateDecl *InstClassTemplate = dyn_cast(Found.front()); if (!InstClassTemplate) return nullptr; if (ClassTemplatePartialSpecializationDecl *Result = InstClassTemplate->findPartialSpecInstantiatedFromMember(D)) return Result; return InstantiateClassTemplatePartialSpecialization(InstClassTemplate, D); } Decl *TemplateDeclInstantiator::VisitVarTemplateDecl(VarTemplateDecl *D) { assert(D->getTemplatedDecl()->isStaticDataMember() && "Only static data member templates are allowed."); // Create a local instantiation scope for this variable template, which // will contain the instantiations of the template parameters. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; VarDecl *Pattern = D->getTemplatedDecl(); VarTemplateDecl *PrevVarTemplate = nullptr; if (getPreviousDeclForInstantiation(Pattern)) { DeclContext::lookup_result Found = Owner->lookup(Pattern->getDeclName()); if (!Found.empty()) PrevVarTemplate = dyn_cast(Found.front()); } VarDecl *VarInst = cast_or_null(VisitVarDecl(Pattern, /*InstantiatingVarTemplate=*/true)); if (!VarInst) return nullptr; DeclContext *DC = Owner; VarTemplateDecl *Inst = VarTemplateDecl::Create( SemaRef.Context, DC, D->getLocation(), D->getIdentifier(), InstParams, VarInst); VarInst->setDescribedVarTemplate(Inst); Inst->setPreviousDecl(PrevVarTemplate); Inst->setAccess(D->getAccess()); if (!PrevVarTemplate) Inst->setInstantiatedFromMemberTemplate(D); if (D->isOutOfLine()) { Inst->setLexicalDeclContext(D->getLexicalDeclContext()); VarInst->setLexicalDeclContext(D->getLexicalDeclContext()); } Owner->addDecl(Inst); if (!PrevVarTemplate) { // Queue up any out-of-line partial specializations of this member // variable template; the client will force their instantiation once // the enclosing class has been instantiated. SmallVector PartialSpecs; D->getPartialSpecializations(PartialSpecs); for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) if (PartialSpecs[I]->getFirstDecl()->isOutOfLine()) OutOfLineVarPartialSpecs.push_back( std::make_pair(Inst, PartialSpecs[I])); } return Inst; } Decl *TemplateDeclInstantiator::VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D) { assert(D->isStaticDataMember() && "Only static data member templates are allowed."); VarTemplateDecl *VarTemplate = D->getSpecializedTemplate(); // Lookup the already-instantiated declaration and return that. DeclContext::lookup_result Found = Owner->lookup(VarTemplate->getDeclName()); assert(!Found.empty() && "Instantiation found nothing?"); VarTemplateDecl *InstVarTemplate = dyn_cast(Found.front()); assert(InstVarTemplate && "Instantiation did not find a variable template?"); if (VarTemplatePartialSpecializationDecl *Result = InstVarTemplate->findPartialSpecInstantiatedFromMember(D)) return Result; return InstantiateVarTemplatePartialSpecialization(InstVarTemplate, D); } Decl * TemplateDeclInstantiator::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { // Create a local instantiation scope for this function template, which // will contain the instantiations of the template parameters and then get // merged with the local instantiation scope for the function template // itself. LocalInstantiationScope Scope(SemaRef); TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; FunctionDecl *Instantiated = nullptr; if (CXXMethodDecl *DMethod = dyn_cast(D->getTemplatedDecl())) Instantiated = cast_or_null(VisitCXXMethodDecl(DMethod, InstParams)); else Instantiated = cast_or_null(VisitFunctionDecl( D->getTemplatedDecl(), InstParams)); if (!Instantiated) return nullptr; // Link the instantiated function template declaration to the function // template from which it was instantiated. FunctionTemplateDecl *InstTemplate = Instantiated->getDescribedFunctionTemplate(); InstTemplate->setAccess(D->getAccess()); assert(InstTemplate && "VisitFunctionDecl/CXXMethodDecl didn't create a template!"); bool isFriend = (InstTemplate->getFriendObjectKind() != Decl::FOK_None); // Link the instantiation back to the pattern *unless* this is a // non-definition friend declaration. if (!InstTemplate->getInstantiatedFromMemberTemplate() && !(isFriend && !D->getTemplatedDecl()->isThisDeclarationADefinition())) InstTemplate->setInstantiatedFromMemberTemplate(D); // Make declarations visible in the appropriate context. if (!isFriend) { Owner->addDecl(InstTemplate); } else if (InstTemplate->getDeclContext()->isRecord() && !getPreviousDeclForInstantiation(D)) { SemaRef.CheckFriendAccess(InstTemplate); } return InstTemplate; } Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { CXXRecordDecl *PrevDecl = nullptr; if (CXXRecordDecl *PatternPrev = getPreviousDeclForInstantiation(D)) { NamedDecl *Prev = SemaRef.FindInstantiatedDecl(D->getLocation(), PatternPrev, TemplateArgs); if (!Prev) return nullptr; PrevDecl = cast(Prev); } CXXRecordDecl *Record = nullptr; bool IsInjectedClassName = D->isInjectedClassName(); if (D->isLambda()) Record = CXXRecordDecl::CreateLambda( SemaRef.Context, Owner, D->getLambdaTypeInfo(), D->getLocation(), D->isDependentLambda(), D->isGenericLambda(), D->getLambdaCaptureDefault()); else Record = CXXRecordDecl::Create(SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(), D->getLocation(), D->getIdentifier(), PrevDecl, /*DelayTypeCreation=*/IsInjectedClassName); // Link the type of the injected-class-name to that of the outer class. if (IsInjectedClassName) (void)SemaRef.Context.getTypeDeclType(Record, cast(Owner)); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Record)) return nullptr; SemaRef.InstantiateAttrsForDecl(TemplateArgs, D, Record, LateAttrs, StartingScope); Record->setImplicit(D->isImplicit()); // FIXME: Check against AS_none is an ugly hack to work around the issue that // the tag decls introduced by friend class declarations don't have an access // specifier. Remove once this area of the code gets sorted out. if (D->getAccess() != AS_none) Record->setAccess(D->getAccess()); if (!IsInjectedClassName) Record->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation); // If the original function was part of a friend declaration, // inherit its namespace state. if (D->getFriendObjectKind()) Record->setObjectOfFriendDecl(); // Make sure that anonymous structs and unions are recorded. if (D->isAnonymousStructOrUnion()) Record->setAnonymousStructOrUnion(true); if (D->isLocalClass()) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Record); // Forward the mangling number from the template to the instantiated decl. SemaRef.Context.setManglingNumber(Record, SemaRef.Context.getManglingNumber(D)); // See if the old tag was defined along with a declarator. // If it did, mark the new tag as being associated with that declarator. if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D)) SemaRef.Context.addDeclaratorForUnnamedTagDecl(Record, DD); // See if the old tag was defined along with a typedef. // If it did, mark the new tag as being associated with that typedef. if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D)) SemaRef.Context.addTypedefNameForUnnamedTagDecl(Record, TND); Owner->addDecl(Record); // DR1484 clarifies that the members of a local class are instantiated as part // of the instantiation of their enclosing entity. if (D->isCompleteDefinition() && D->isLocalClass()) { Sema::LocalEagerInstantiationScope LocalInstantiations(SemaRef); SemaRef.InstantiateClass(D->getLocation(), Record, D, TemplateArgs, TSK_ImplicitInstantiation, /*Complain=*/true); // For nested local classes, we will instantiate the members when we // reach the end of the outermost (non-nested) local class. if (!D->isCXXClassMember()) SemaRef.InstantiateClassMembers(D->getLocation(), Record, TemplateArgs, TSK_ImplicitInstantiation); // This class may have local implicit instantiations that need to be // performed within this scope. LocalInstantiations.perform(); } SemaRef.DiagnoseUnusedNestedTypedefs(Record); if (IsInjectedClassName) assert(Record->isInjectedClassName() && "Broken injected-class-name"); return Record; } /// Adjust the given function type for an instantiation of the /// given declaration, to cope with modifications to the function's type that /// aren't reflected in the type-source information. /// /// \param D The declaration we're instantiating. /// \param TInfo The already-instantiated type. static QualType adjustFunctionTypeForInstantiation(ASTContext &Context, FunctionDecl *D, TypeSourceInfo *TInfo) { const FunctionProtoType *OrigFunc = D->getType()->castAs(); const FunctionProtoType *NewFunc = TInfo->getType()->castAs(); if (OrigFunc->getExtInfo() == NewFunc->getExtInfo()) return TInfo->getType(); FunctionProtoType::ExtProtoInfo NewEPI = NewFunc->getExtProtoInfo(); NewEPI.ExtInfo = OrigFunc->getExtInfo(); return Context.getFunctionType(NewFunc->getReturnType(), NewFunc->getParamTypes(), NewEPI); } /// Normal class members are of more specific types and therefore /// don't make it here. This function serves three purposes: /// 1) instantiating function templates /// 2) substituting friend declarations /// 3) substituting deduction guide declarations for nested class templates Decl *TemplateDeclInstantiator::VisitFunctionDecl( FunctionDecl *D, TemplateParameterList *TemplateParams, RewriteKind FunctionRewriteKind) { // Check whether there is already a function template specialization for // this declaration. FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate(); if (FunctionTemplate && !TemplateParams) { ArrayRef Innermost = TemplateArgs.getInnermost(); void *InsertPos = nullptr; FunctionDecl *SpecFunc = FunctionTemplate->findSpecialization(Innermost, InsertPos); // If we already have a function template specialization, return it. if (SpecFunc) return SpecFunc; } bool isFriend; if (FunctionTemplate) isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None); else isFriend = (D->getFriendObjectKind() != Decl::FOK_None); bool MergeWithParentScope = (TemplateParams != nullptr) || Owner->isFunctionOrMethod() || !(isa(Owner) && cast(Owner)->isDefinedOutsideFunctionOrMethod()); LocalInstantiationScope Scope(SemaRef, MergeWithParentScope); ExplicitSpecifier InstantiatedExplicitSpecifier; if (auto *DGuide = dyn_cast(D)) { InstantiatedExplicitSpecifier = instantiateExplicitSpecifier( SemaRef, TemplateArgs, DGuide->getExplicitSpecifier(), DGuide); if (InstantiatedExplicitSpecifier.isInvalid()) return nullptr; } SmallVector Params; TypeSourceInfo *TInfo = SubstFunctionType(D, Params); if (!TInfo) return nullptr; QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo); if (TemplateParams && TemplateParams->size()) { auto *LastParam = dyn_cast(TemplateParams->asArray().back()); if (LastParam && LastParam->isImplicit() && LastParam->hasTypeConstraint()) { // In abbreviated templates, the type-constraints of invented template // type parameters are instantiated with the function type, invalidating // the TemplateParameterList which relied on the template type parameter // not having a type constraint. Recreate the TemplateParameterList with // the updated parameter list. TemplateParams = TemplateParameterList::Create( SemaRef.Context, TemplateParams->getTemplateLoc(), TemplateParams->getLAngleLoc(), TemplateParams->asArray(), TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause()); } } NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); if (!QualifierLoc) return nullptr; } // FIXME: Concepts: Do not substitute into constraint expressions Expr *TrailingRequiresClause = D->getTrailingRequiresClause(); if (TrailingRequiresClause) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause, TemplateArgs); if (SubstRC.isInvalid()) return nullptr; TrailingRequiresClause = SubstRC.get(); if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause)) return nullptr; } // If we're instantiating a local function declaration, put the result // in the enclosing namespace; otherwise we need to find the instantiated // context. DeclContext *DC; if (D->isLocalExternDecl()) { DC = Owner; SemaRef.adjustContextForLocalExternDecl(DC); } else if (isFriend && QualifierLoc) { CXXScopeSpec SS; SS.Adopt(QualifierLoc); DC = SemaRef.computeDeclContext(SS); if (!DC) return nullptr; } else { DC = SemaRef.FindInstantiatedContext(D->getLocation(), D->getDeclContext(), TemplateArgs); } DeclarationNameInfo NameInfo = SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs); if (FunctionRewriteKind != RewriteKind::None) adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo); FunctionDecl *Function; if (auto *DGuide = dyn_cast(D)) { Function = CXXDeductionGuideDecl::Create( SemaRef.Context, DC, D->getInnerLocStart(), InstantiatedExplicitSpecifier, NameInfo, T, TInfo, D->getSourceRange().getEnd()); if (DGuide->isCopyDeductionCandidate()) cast(Function)->setIsCopyDeductionCandidate(); Function->setAccess(D->getAccess()); } else { Function = FunctionDecl::Create( SemaRef.Context, DC, D->getInnerLocStart(), NameInfo, T, TInfo, D->getCanonicalDecl()->getStorageClass(), D->UsesFPIntrin(), D->isInlineSpecified(), D->hasWrittenPrototype(), D->getConstexprKind(), TrailingRequiresClause); Function->setRangeEnd(D->getSourceRange().getEnd()); } if (D->isInlined()) Function->setImplicitlyInline(); if (QualifierLoc) Function->setQualifierInfo(QualifierLoc); if (D->isLocalExternDecl()) Function->setLocalExternDecl(); DeclContext *LexicalDC = Owner; if (!isFriend && D->isOutOfLine() && !D->isLocalExternDecl()) { assert(D->getDeclContext()->isFileContext()); LexicalDC = D->getDeclContext(); } Function->setLexicalDeclContext(LexicalDC); // Attach the parameters for (unsigned P = 0; P < Params.size(); ++P) if (Params[P]) Params[P]->setOwningFunction(Function); Function->setParams(Params); if (TrailingRequiresClause) Function->setTrailingRequiresClause(TrailingRequiresClause); if (TemplateParams) { // Our resulting instantiation is actually a function template, since we // are substituting only the outer template parameters. For example, given // // template // struct X { // template friend void f(T, U); // }; // // X x; // // We are instantiating the friend function template "f" within X, // which means substituting int for T, but leaving "f" as a friend function // template. // Build the function template itself. FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, DC, Function->getLocation(), Function->getDeclName(), TemplateParams, Function); Function->setDescribedFunctionTemplate(FunctionTemplate); FunctionTemplate->setLexicalDeclContext(LexicalDC); if (isFriend && D->isThisDeclarationADefinition()) { FunctionTemplate->setInstantiatedFromMemberTemplate( D->getDescribedFunctionTemplate()); } } else if (FunctionTemplate) { // Record this function template specialization. ArrayRef Innermost = TemplateArgs.getInnermost(); Function->setFunctionTemplateSpecialization(FunctionTemplate, TemplateArgumentList::CreateCopy(SemaRef.Context, Innermost), /*InsertPos=*/nullptr); } else if (isFriend && D->isThisDeclarationADefinition()) { // Do not connect the friend to the template unless it's actually a // definition. We don't want non-template functions to be marked as being // template instantiations. Function->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation); } if (isFriend) { Function->setObjectOfFriendDecl(); if (FunctionTemplateDecl *FT = Function->getDescribedFunctionTemplate()) FT->setObjectOfFriendDecl(); } if (InitFunctionInstantiation(Function, D)) Function->setInvalidDecl(); bool IsExplicitSpecialization = false; LookupResult Previous( SemaRef, Function->getDeclName(), SourceLocation(), D->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage : Sema::LookupOrdinaryName, D->isLocalExternDecl() ? Sema::ForExternalRedeclaration : SemaRef.forRedeclarationInCurContext()); if (DependentFunctionTemplateSpecializationInfo *Info = D->getDependentSpecializationInfo()) { assert(isFriend && "non-friend has dependent specialization info?"); // Instantiate the explicit template arguments. TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; // Map the candidate templates to their instantiations. for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) { Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(), Info->getTemplate(I), TemplateArgs); if (!Temp) return nullptr; Previous.addDecl(cast(Temp)); } if (SemaRef.CheckFunctionTemplateSpecialization(Function, &ExplicitArgs, Previous)) Function->setInvalidDecl(); IsExplicitSpecialization = true; } else if (const ASTTemplateArgumentListInfo *Info = D->getTemplateSpecializationArgsAsWritten()) { // The name of this function was written as a template-id. SemaRef.LookupQualifiedName(Previous, DC); // Instantiate the explicit template arguments. TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; if (SemaRef.CheckFunctionTemplateSpecialization(Function, &ExplicitArgs, Previous)) Function->setInvalidDecl(); IsExplicitSpecialization = true; } else if (TemplateParams || !FunctionTemplate) { // Look only into the namespace where the friend would be declared to // find a previous declaration. This is the innermost enclosing namespace, // as described in ActOnFriendFunctionDecl. SemaRef.LookupQualifiedName(Previous, DC->getRedeclContext()); // In C++, the previous declaration we find might be a tag type // (class or enum). In this case, the new declaration will hide the // tag type. Note that this does does not apply if we're declaring a // typedef (C++ [dcl.typedef]p4). if (Previous.isSingleTagDecl()) Previous.clear(); // Filter out previous declarations that don't match the scope. The only // effect this has is to remove declarations found in inline namespaces // for friend declarations with unqualified names. SemaRef.FilterLookupForScope(Previous, DC, /*Scope*/ nullptr, /*ConsiderLinkage*/ true, QualifierLoc.hasQualifier()); } SemaRef.CheckFunctionDeclaration(/*Scope*/ nullptr, Function, Previous, IsExplicitSpecialization); // Check the template parameter list against the previous declaration. The // goal here is to pick up default arguments added since the friend was // declared; we know the template parameter lists match, since otherwise // we would not have picked this template as the previous declaration. if (isFriend && TemplateParams && FunctionTemplate->getPreviousDecl()) { SemaRef.CheckTemplateParameterList( TemplateParams, FunctionTemplate->getPreviousDecl()->getTemplateParameters(), Function->isThisDeclarationADefinition() ? Sema::TPC_FriendFunctionTemplateDefinition : Sema::TPC_FriendFunctionTemplate); } // If we're introducing a friend definition after the first use, trigger // instantiation. // FIXME: If this is a friend function template definition, we should check // to see if any specializations have been used. if (isFriend && D->isThisDeclarationADefinition() && Function->isUsed(false)) { if (MemberSpecializationInfo *MSInfo = Function->getMemberSpecializationInfo()) { if (MSInfo->getPointOfInstantiation().isInvalid()) { SourceLocation Loc = D->getLocation(); // FIXME MSInfo->setPointOfInstantiation(Loc); SemaRef.PendingLocalImplicitInstantiations.push_back( std::make_pair(Function, Loc)); } } } if (D->isExplicitlyDefaulted()) { if (SubstDefaultedFunction(Function, D)) return nullptr; } if (D->isDeleted()) SemaRef.SetDeclDeleted(Function, D->getLocation()); NamedDecl *PrincipalDecl = (TemplateParams ? cast(FunctionTemplate) : Function); // If this declaration lives in a different context from its lexical context, // add it to the corresponding lookup table. if (isFriend || (Function->isLocalExternDecl() && !Function->getPreviousDecl())) DC->makeDeclVisibleInContext(PrincipalDecl); if (Function->isOverloadedOperator() && !DC->isRecord() && PrincipalDecl->isInIdentifierNamespace(Decl::IDNS_Ordinary)) PrincipalDecl->setNonMemberOperator(); return Function; } Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( CXXMethodDecl *D, TemplateParameterList *TemplateParams, Optional ClassScopeSpecializationArgs, RewriteKind FunctionRewriteKind) { FunctionTemplateDecl *FunctionTemplate = D->getDescribedFunctionTemplate(); if (FunctionTemplate && !TemplateParams) { // We are creating a function template specialization from a function // template. Check whether there is already a function template // specialization for this particular set of template arguments. ArrayRef Innermost = TemplateArgs.getInnermost(); void *InsertPos = nullptr; FunctionDecl *SpecFunc = FunctionTemplate->findSpecialization(Innermost, InsertPos); // If we already have a function template specialization, return it. if (SpecFunc) return SpecFunc; } bool isFriend; if (FunctionTemplate) isFriend = (FunctionTemplate->getFriendObjectKind() != Decl::FOK_None); else isFriend = (D->getFriendObjectKind() != Decl::FOK_None); bool MergeWithParentScope = (TemplateParams != nullptr) || !(isa(Owner) && cast(Owner)->isDefinedOutsideFunctionOrMethod()); LocalInstantiationScope Scope(SemaRef, MergeWithParentScope); // Instantiate enclosing template arguments for friends. SmallVector TempParamLists; unsigned NumTempParamLists = 0; if (isFriend && (NumTempParamLists = D->getNumTemplateParameterLists())) { TempParamLists.resize(NumTempParamLists); for (unsigned I = 0; I != NumTempParamLists; ++I) { TemplateParameterList *TempParams = D->getTemplateParameterList(I); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; TempParamLists[I] = InstParams; } } ExplicitSpecifier InstantiatedExplicitSpecifier = instantiateExplicitSpecifier(SemaRef, TemplateArgs, ExplicitSpecifier::getFromDecl(D), D); if (InstantiatedExplicitSpecifier.isInvalid()) return nullptr; // Implicit destructors/constructors created for local classes in // DeclareImplicit* (see SemaDeclCXX.cpp) might not have an associated TSI. // Unfortunately there isn't enough context in those functions to // conditionally populate the TSI without breaking non-template related use // cases. Populate TSIs prior to calling SubstFunctionType to make sure we get // a proper transformation. if (cast(D->getParent())->isLambda() && !D->getTypeSourceInfo() && isa(D)) { TypeSourceInfo *TSI = SemaRef.Context.getTrivialTypeSourceInfo(D->getType()); D->setTypeSourceInfo(TSI); } SmallVector Params; TypeSourceInfo *TInfo = SubstFunctionType(D, Params); if (!TInfo) return nullptr; QualType T = adjustFunctionTypeForInstantiation(SemaRef.Context, D, TInfo); if (TemplateParams && TemplateParams->size()) { auto *LastParam = dyn_cast(TemplateParams->asArray().back()); if (LastParam && LastParam->isImplicit() && LastParam->hasTypeConstraint()) { // In abbreviated templates, the type-constraints of invented template // type parameters are instantiated with the function type, invalidating // the TemplateParameterList which relied on the template type parameter // not having a type constraint. Recreate the TemplateParameterList with // the updated parameter list. TemplateParams = TemplateParameterList::Create( SemaRef.Context, TemplateParams->getTemplateLoc(), TemplateParams->getLAngleLoc(), TemplateParams->asArray(), TemplateParams->getRAngleLoc(), TemplateParams->getRequiresClause()); } } NestedNameSpecifierLoc QualifierLoc = D->getQualifierLoc(); if (QualifierLoc) { QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); if (!QualifierLoc) return nullptr; } // FIXME: Concepts: Do not substitute into constraint expressions Expr *TrailingRequiresClause = D->getTrailingRequiresClause(); if (TrailingRequiresClause) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); auto *ThisContext = dyn_cast_or_null(Owner); Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, D->getMethodQualifiers(), ThisContext); ExprResult SubstRC = SemaRef.SubstExpr(TrailingRequiresClause, TemplateArgs); if (SubstRC.isInvalid()) return nullptr; TrailingRequiresClause = SubstRC.get(); if (!SemaRef.CheckConstraintExpression(TrailingRequiresClause)) return nullptr; } DeclContext *DC = Owner; if (isFriend) { if (QualifierLoc) { CXXScopeSpec SS; SS.Adopt(QualifierLoc); DC = SemaRef.computeDeclContext(SS); if (DC && SemaRef.RequireCompleteDeclContext(SS, DC)) return nullptr; } else { DC = SemaRef.FindInstantiatedContext(D->getLocation(), D->getDeclContext(), TemplateArgs); } if (!DC) return nullptr; } DeclarationNameInfo NameInfo = SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs); if (FunctionRewriteKind != RewriteKind::None) adjustForRewrite(FunctionRewriteKind, D, T, TInfo, NameInfo); // Build the instantiated method declaration. CXXRecordDecl *Record = cast(DC); CXXMethodDecl *Method = nullptr; SourceLocation StartLoc = D->getInnerLocStart(); if (CXXConstructorDecl *Constructor = dyn_cast(D)) { Method = CXXConstructorDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, InstantiatedExplicitSpecifier, Constructor->UsesFPIntrin(), Constructor->isInlineSpecified(), false, Constructor->getConstexprKind(), InheritedConstructor(), TrailingRequiresClause); Method->setRangeEnd(Constructor->getEndLoc()); } else if (CXXDestructorDecl *Destructor = dyn_cast(D)) { Method = CXXDestructorDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, Destructor->UsesFPIntrin(), Destructor->isInlineSpecified(), false, Destructor->getConstexprKind(), TrailingRequiresClause); Method->setRangeEnd(Destructor->getEndLoc()); Method->setDeclName(SemaRef.Context.DeclarationNames.getCXXDestructorName( SemaRef.Context.getCanonicalType( SemaRef.Context.getTypeDeclType(Record)))); } else if (CXXConversionDecl *Conversion = dyn_cast(D)) { Method = CXXConversionDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, Conversion->UsesFPIntrin(), Conversion->isInlineSpecified(), InstantiatedExplicitSpecifier, Conversion->getConstexprKind(), Conversion->getEndLoc(), TrailingRequiresClause); } else { StorageClass SC = D->isStatic() ? SC_Static : SC_None; Method = CXXMethodDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, SC, D->UsesFPIntrin(), D->isInlineSpecified(), D->getConstexprKind(), D->getEndLoc(), TrailingRequiresClause); } if (D->isInlined()) Method->setImplicitlyInline(); if (QualifierLoc) Method->setQualifierInfo(QualifierLoc); if (TemplateParams) { // Our resulting instantiation is actually a function template, since we // are substituting only the outer template parameters. For example, given // // template // struct X { // template void f(T, U); // }; // // X x; // // We are instantiating the member template "f" within X, which means // substituting int for T, but leaving "f" as a member function template. // Build the function template itself. FunctionTemplate = FunctionTemplateDecl::Create(SemaRef.Context, Record, Method->getLocation(), Method->getDeclName(), TemplateParams, Method); if (isFriend) { FunctionTemplate->setLexicalDeclContext(Owner); FunctionTemplate->setObjectOfFriendDecl(); } else if (D->isOutOfLine()) FunctionTemplate->setLexicalDeclContext(D->getLexicalDeclContext()); Method->setDescribedFunctionTemplate(FunctionTemplate); } else if (FunctionTemplate) { // Record this function template specialization. ArrayRef Innermost = TemplateArgs.getInnermost(); Method->setFunctionTemplateSpecialization(FunctionTemplate, TemplateArgumentList::CreateCopy(SemaRef.Context, Innermost), /*InsertPos=*/nullptr); } else if (!isFriend) { // Record that this is an instantiation of a member function. Method->setInstantiationOfMemberFunction(D, TSK_ImplicitInstantiation); } // If we are instantiating a member function defined // out-of-line, the instantiation will have the same lexical // context (which will be a namespace scope) as the template. if (isFriend) { if (NumTempParamLists) Method->setTemplateParameterListsInfo( SemaRef.Context, llvm::makeArrayRef(TempParamLists.data(), NumTempParamLists)); Method->setLexicalDeclContext(Owner); Method->setObjectOfFriendDecl(); } else if (D->isOutOfLine()) Method->setLexicalDeclContext(D->getLexicalDeclContext()); // Attach the parameters for (unsigned P = 0; P < Params.size(); ++P) Params[P]->setOwningFunction(Method); Method->setParams(Params); if (InitMethodInstantiation(Method, D)) Method->setInvalidDecl(); LookupResult Previous(SemaRef, NameInfo, Sema::LookupOrdinaryName, Sema::ForExternalRedeclaration); bool IsExplicitSpecialization = false; // If the name of this function was written as a template-id, instantiate // the explicit template arguments. if (DependentFunctionTemplateSpecializationInfo *Info = D->getDependentSpecializationInfo()) { assert(isFriend && "non-friend has dependent specialization info?"); // Instantiate the explicit template arguments. TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; // Map the candidate templates to their instantiations. for (unsigned I = 0, E = Info->getNumTemplates(); I != E; ++I) { Decl *Temp = SemaRef.FindInstantiatedDecl(D->getLocation(), Info->getTemplate(I), TemplateArgs); if (!Temp) return nullptr; Previous.addDecl(cast(Temp)); } if (SemaRef.CheckFunctionTemplateSpecialization(Method, &ExplicitArgs, Previous)) Method->setInvalidDecl(); IsExplicitSpecialization = true; } else if (const ASTTemplateArgumentListInfo *Info = ClassScopeSpecializationArgs.getValueOr( D->getTemplateSpecializationArgsAsWritten())) { SemaRef.LookupQualifiedName(Previous, DC); TemplateArgumentListInfo ExplicitArgs(Info->getLAngleLoc(), Info->getRAngleLoc()); if (SemaRef.SubstTemplateArguments(Info->arguments(), TemplateArgs, ExplicitArgs)) return nullptr; if (SemaRef.CheckFunctionTemplateSpecialization(Method, &ExplicitArgs, Previous)) Method->setInvalidDecl(); IsExplicitSpecialization = true; } else if (ClassScopeSpecializationArgs) { // Class-scope explicit specialization written without explicit template // arguments. SemaRef.LookupQualifiedName(Previous, DC); if (SemaRef.CheckFunctionTemplateSpecialization(Method, nullptr, Previous)) Method->setInvalidDecl(); IsExplicitSpecialization = true; } else if (!FunctionTemplate || TemplateParams || isFriend) { SemaRef.LookupQualifiedName(Previous, Record); // In C++, the previous declaration we find might be a tag type // (class or enum). In this case, the new declaration will hide the // tag type. Note that this does does not apply if we're declaring a // typedef (C++ [dcl.typedef]p4). if (Previous.isSingleTagDecl()) Previous.clear(); } SemaRef.CheckFunctionDeclaration(nullptr, Method, Previous, IsExplicitSpecialization); if (D->isPure()) SemaRef.CheckPureMethod(Method, SourceRange()); // Propagate access. For a non-friend declaration, the access is // whatever we're propagating from. For a friend, it should be the // previous declaration we just found. if (isFriend && Method->getPreviousDecl()) Method->setAccess(Method->getPreviousDecl()->getAccess()); else Method->setAccess(D->getAccess()); if (FunctionTemplate) FunctionTemplate->setAccess(Method->getAccess()); SemaRef.CheckOverrideControl(Method); // If a function is defined as defaulted or deleted, mark it as such now. if (D->isExplicitlyDefaulted()) { if (SubstDefaultedFunction(Method, D)) return nullptr; } if (D->isDeletedAsWritten()) SemaRef.SetDeclDeleted(Method, Method->getLocation()); // If this is an explicit specialization, mark the implicitly-instantiated // template specialization as being an explicit specialization too. // FIXME: Is this necessary? if (IsExplicitSpecialization && !isFriend) SemaRef.CompleteMemberSpecialization(Method, Previous); // If there's a function template, let our caller handle it. if (FunctionTemplate) { // do nothing // Don't hide a (potentially) valid declaration with an invalid one. } else if (Method->isInvalidDecl() && !Previous.empty()) { // do nothing // Otherwise, check access to friends and make them visible. } else if (isFriend) { // We only need to re-check access for methods which we didn't // manage to match during parsing. if (!D->getPreviousDecl()) SemaRef.CheckFriendAccess(Method); Record->makeDeclVisibleInContext(Method); // Otherwise, add the declaration. We don't need to do this for // class-scope specializations because we'll have matched them with // the appropriate template. } else { Owner->addDecl(Method); } // PR17480: Honor the used attribute to instantiate member function // definitions if (Method->hasAttr()) { if (const auto *A = dyn_cast(Owner)) { SourceLocation Loc; if (const MemberSpecializationInfo *MSInfo = A->getMemberSpecializationInfo()) Loc = MSInfo->getPointOfInstantiation(); else if (const auto *Spec = dyn_cast(A)) Loc = Spec->getPointOfInstantiation(); SemaRef.MarkFunctionReferenced(Loc, Method); } } return Method; } Decl *TemplateDeclInstantiator::VisitCXXConstructorDecl(CXXConstructorDecl *D) { return VisitCXXMethodDecl(D); } Decl *TemplateDeclInstantiator::VisitCXXDestructorDecl(CXXDestructorDecl *D) { return VisitCXXMethodDecl(D); } Decl *TemplateDeclInstantiator::VisitCXXConversionDecl(CXXConversionDecl *D) { return VisitCXXMethodDecl(D); } Decl *TemplateDeclInstantiator::VisitParmVarDecl(ParmVarDecl *D) { return SemaRef.SubstParmVarDecl(D, TemplateArgs, /*indexAdjustment*/ 0, None, /*ExpectParameterPack=*/ false); } Decl *TemplateDeclInstantiator::VisitTemplateTypeParmDecl( TemplateTypeParmDecl *D) { assert(D->getTypeForDecl()->isTemplateTypeParmType()); Optional NumExpanded; if (const TypeConstraint *TC = D->getTypeConstraint()) { if (D->isPackExpansion() && !D->isExpandedParameterPack()) { assert(TC->getTemplateArgsAsWritten() && "type parameter can only be an expansion when explicit arguments " "are specified"); // The template type parameter pack's type is a pack expansion of types. // Determine whether we need to expand this parameter pack into separate // types. SmallVector Unexpanded; for (auto &ArgLoc : TC->getTemplateArgsAsWritten()->arguments()) SemaRef.collectUnexpandedParameterPacks(ArgLoc, Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; if (SemaRef.CheckParameterPacksForExpansion( cast(TC->getImmediatelyDeclaredConstraint()) ->getEllipsisLoc(), SourceRange(TC->getConceptNameLoc(), TC->hasExplicitTemplateArgs() ? TC->getTemplateArgsAsWritten()->getRAngleLoc() : TC->getConceptNameInfo().getEndLoc()), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpanded)) return nullptr; } } TemplateTypeParmDecl *Inst = TemplateTypeParmDecl::Create( SemaRef.Context, Owner, D->getBeginLoc(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getIndex(), D->getIdentifier(), D->wasDeclaredWithTypename(), D->isParameterPack(), D->hasTypeConstraint(), NumExpanded); Inst->setAccess(AS_public); Inst->setImplicit(D->isImplicit()); if (auto *TC = D->getTypeConstraint()) { if (!D->isImplicit()) { // Invented template parameter type constraints will be instantiated with // the corresponding auto-typed parameter as it might reference other // parameters. // TODO: Concepts: do not instantiate the constraint (delayed constraint // substitution) if (SemaRef.SubstTypeConstraint(Inst, TC, TemplateArgs)) return nullptr; } } if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) { TypeSourceInfo *InstantiatedDefaultArg = SemaRef.SubstType(D->getDefaultArgumentInfo(), TemplateArgs, D->getDefaultArgumentLoc(), D->getDeclName()); if (InstantiatedDefaultArg) Inst->setDefaultArgument(InstantiatedDefaultArg); } // Introduce this template parameter's instantiation into the instantiation // scope. SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitNonTypeTemplateParmDecl( NonTypeTemplateParmDecl *D) { // Substitute into the type of the non-type template parameter. TypeLoc TL = D->getTypeSourceInfo()->getTypeLoc(); SmallVector ExpandedParameterPackTypesAsWritten; SmallVector ExpandedParameterPackTypes; bool IsExpandedParameterPack = false; TypeSourceInfo *DI; QualType T; bool Invalid = false; if (D->isExpandedParameterPack()) { // The non-type template parameter pack is an already-expanded pack // expansion of types. Substitute into each of the expanded types. ExpandedParameterPackTypes.reserve(D->getNumExpansionTypes()); ExpandedParameterPackTypesAsWritten.reserve(D->getNumExpansionTypes()); for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { TypeSourceInfo *NewDI = SemaRef.SubstType(D->getExpansionTypeSourceInfo(I), TemplateArgs, D->getLocation(), D->getDeclName()); if (!NewDI) return nullptr; QualType NewT = SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation()); if (NewT.isNull()) return nullptr; ExpandedParameterPackTypesAsWritten.push_back(NewDI); ExpandedParameterPackTypes.push_back(NewT); } IsExpandedParameterPack = true; DI = D->getTypeSourceInfo(); T = DI->getType(); } else if (D->isPackExpansion()) { // The non-type template parameter pack's type is a pack expansion of types. // Determine whether we need to expand this parameter pack into separate // types. PackExpansionTypeLoc Expansion = TL.castAs(); TypeLoc Pattern = Expansion.getPatternLoc(); SmallVector Unexpanded; SemaRef.collectUnexpandedParameterPacks(Pattern, Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; Optional OrigNumExpansions = Expansion.getTypePtr()->getNumExpansions(); Optional NumExpansions = OrigNumExpansions; if (SemaRef.CheckParameterPacksForExpansion(Expansion.getEllipsisLoc(), Pattern.getSourceRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return nullptr; if (Expand) { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I); TypeSourceInfo *NewDI = SemaRef.SubstType(Pattern, TemplateArgs, D->getLocation(), D->getDeclName()); if (!NewDI) return nullptr; QualType NewT = SemaRef.CheckNonTypeTemplateParameterType(NewDI, D->getLocation()); if (NewT.isNull()) return nullptr; ExpandedParameterPackTypesAsWritten.push_back(NewDI); ExpandedParameterPackTypes.push_back(NewT); } // Note that we have an expanded parameter pack. The "type" of this // expanded parameter pack is the original expansion type, but callers // will end up using the expanded parameter pack types for type-checking. IsExpandedParameterPack = true; DI = D->getTypeSourceInfo(); T = DI->getType(); } else { // We cannot fully expand the pack expansion now, so substitute into the // pattern and create a new pack expansion type. Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1); TypeSourceInfo *NewPattern = SemaRef.SubstType(Pattern, TemplateArgs, D->getLocation(), D->getDeclName()); if (!NewPattern) return nullptr; SemaRef.CheckNonTypeTemplateParameterType(NewPattern, D->getLocation()); DI = SemaRef.CheckPackExpansion(NewPattern, Expansion.getEllipsisLoc(), NumExpansions); if (!DI) return nullptr; T = DI->getType(); } } else { // Simple case: substitution into a parameter that is not a parameter pack. DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs, D->getLocation(), D->getDeclName()); if (!DI) return nullptr; // Check that this type is acceptable for a non-type template parameter. T = SemaRef.CheckNonTypeTemplateParameterType(DI, D->getLocation()); if (T.isNull()) { T = SemaRef.Context.IntTy; Invalid = true; } } NonTypeTemplateParmDecl *Param; if (IsExpandedParameterPack) Param = NonTypeTemplateParmDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->getIdentifier(), T, DI, ExpandedParameterPackTypes, ExpandedParameterPackTypesAsWritten); else Param = NonTypeTemplateParmDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->getIdentifier(), T, D->isParameterPack(), DI); if (AutoTypeLoc AutoLoc = DI->getTypeLoc().getContainedAutoTypeLoc()) if (AutoLoc.isConstrained()) if (SemaRef.AttachTypeConstraint( AutoLoc, Param, IsExpandedParameterPack ? DI->getTypeLoc().getAs() .getEllipsisLoc() : SourceLocation())) Invalid = true; Param->setAccess(AS_public); Param->setImplicit(D->isImplicit()); if (Invalid) Param->setInvalidDecl(); if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); ExprResult Value = SemaRef.SubstExpr(D->getDefaultArgument(), TemplateArgs); if (!Value.isInvalid()) Param->setDefaultArgument(Value.get()); } // Introduce this template parameter's instantiation into the instantiation // scope. SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param); return Param; } static void collectUnexpandedParameterPacks( Sema &S, TemplateParameterList *Params, SmallVectorImpl &Unexpanded) { for (const auto &P : *Params) { if (P->isTemplateParameterPack()) continue; if (NonTypeTemplateParmDecl *NTTP = dyn_cast(P)) S.collectUnexpandedParameterPacks(NTTP->getTypeSourceInfo()->getTypeLoc(), Unexpanded); if (TemplateTemplateParmDecl *TTP = dyn_cast(P)) collectUnexpandedParameterPacks(S, TTP->getTemplateParameters(), Unexpanded); } } Decl * TemplateDeclInstantiator::VisitTemplateTemplateParmDecl( TemplateTemplateParmDecl *D) { // Instantiate the template parameter list of the template template parameter. TemplateParameterList *TempParams = D->getTemplateParameters(); TemplateParameterList *InstParams; SmallVector ExpandedParams; bool IsExpandedParameterPack = false; if (D->isExpandedParameterPack()) { // The template template parameter pack is an already-expanded pack // expansion of template parameters. Substitute into each of the expanded // parameters. ExpandedParams.reserve(D->getNumExpansionTemplateParameters()); for (unsigned I = 0, N = D->getNumExpansionTemplateParameters(); I != N; ++I) { LocalInstantiationScope Scope(SemaRef); TemplateParameterList *Expansion = SubstTemplateParams(D->getExpansionTemplateParameters(I)); if (!Expansion) return nullptr; ExpandedParams.push_back(Expansion); } IsExpandedParameterPack = true; InstParams = TempParams; } else if (D->isPackExpansion()) { // The template template parameter pack expands to a pack of template // template parameters. Determine whether we need to expand this parameter // pack into separate parameters. SmallVector Unexpanded; collectUnexpandedParameterPacks(SemaRef, D->getTemplateParameters(), Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; Optional NumExpansions; if (SemaRef.CheckParameterPacksForExpansion(D->getLocation(), TempParams->getSourceRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return nullptr; if (Expand) { for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I); LocalInstantiationScope Scope(SemaRef); TemplateParameterList *Expansion = SubstTemplateParams(TempParams); if (!Expansion) return nullptr; ExpandedParams.push_back(Expansion); } // Note that we have an expanded parameter pack. The "type" of this // expanded parameter pack is the original expansion type, but callers // will end up using the expanded parameter pack types for type-checking. IsExpandedParameterPack = true; InstParams = TempParams; } else { // We cannot fully expand the pack expansion now, so just substitute // into the pattern. Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1); LocalInstantiationScope Scope(SemaRef); InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; } } else { // Perform the actual substitution of template parameters within a new, // local instantiation scope. LocalInstantiationScope Scope(SemaRef); InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; } // Build the template template parameter. TemplateTemplateParmDecl *Param; if (IsExpandedParameterPack) Param = TemplateTemplateParmDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->getIdentifier(), InstParams, ExpandedParams); else Param = TemplateTemplateParmDecl::Create( SemaRef.Context, Owner, D->getLocation(), D->getDepth() - TemplateArgs.getNumSubstitutedLevels(), D->getPosition(), D->isParameterPack(), D->getIdentifier(), InstParams); if (D->hasDefaultArgument() && !D->defaultArgumentWasInherited()) { NestedNameSpecifierLoc QualifierLoc = D->getDefaultArgument().getTemplateQualifierLoc(); QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(QualifierLoc, TemplateArgs); TemplateName TName = SemaRef.SubstTemplateName( QualifierLoc, D->getDefaultArgument().getArgument().getAsTemplate(), D->getDefaultArgument().getTemplateNameLoc(), TemplateArgs); if (!TName.isNull()) Param->setDefaultArgument( SemaRef.Context, TemplateArgumentLoc(SemaRef.Context, TemplateArgument(TName), D->getDefaultArgument().getTemplateQualifierLoc(), D->getDefaultArgument().getTemplateNameLoc())); } Param->setAccess(AS_public); Param->setImplicit(D->isImplicit()); // Introduce this template parameter's instantiation into the instantiation // scope. SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, Param); return Param; } Decl *TemplateDeclInstantiator::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) { // Using directives are never dependent (and never contain any types or // expressions), so they require no explicit instantiation work. UsingDirectiveDecl *Inst = UsingDirectiveDecl::Create(SemaRef.Context, Owner, D->getLocation(), D->getNamespaceKeyLocation(), D->getQualifierLoc(), D->getIdentLocation(), D->getNominatedNamespace(), D->getCommonAncestor()); // Add the using directive to its declaration context // only if this is not a function or method. if (!Owner->isFunctionOrMethod()) Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitBaseUsingDecls(BaseUsingDecl *D, BaseUsingDecl *Inst, LookupResult *Lookup) { bool isFunctionScope = Owner->isFunctionOrMethod(); for (auto *Shadow : D->shadows()) { // FIXME: UsingShadowDecl doesn't preserve its immediate target, so // reconstruct it in the case where it matters. Hm, can we extract it from // the DeclSpec when parsing and save it in the UsingDecl itself? NamedDecl *OldTarget = Shadow->getTargetDecl(); if (auto *CUSD = dyn_cast(Shadow)) if (auto *BaseShadow = CUSD->getNominatedBaseClassShadowDecl()) OldTarget = BaseShadow; NamedDecl *InstTarget = nullptr; if (auto *EmptyD = dyn_cast(Shadow->getTargetDecl())) { InstTarget = UnresolvedUsingIfExistsDecl::Create( SemaRef.Context, Owner, EmptyD->getLocation(), EmptyD->getDeclName()); } else { InstTarget = cast_or_null(SemaRef.FindInstantiatedDecl( Shadow->getLocation(), OldTarget, TemplateArgs)); } if (!InstTarget) return nullptr; UsingShadowDecl *PrevDecl = nullptr; if (Lookup && SemaRef.CheckUsingShadowDecl(Inst, InstTarget, *Lookup, PrevDecl)) continue; if (UsingShadowDecl *OldPrev = getPreviousDeclForInstantiation(Shadow)) PrevDecl = cast_or_null(SemaRef.FindInstantiatedDecl( Shadow->getLocation(), OldPrev, TemplateArgs)); UsingShadowDecl *InstShadow = SemaRef.BuildUsingShadowDecl( /*Scope*/ nullptr, Inst, InstTarget, PrevDecl); SemaRef.Context.setInstantiatedFromUsingShadowDecl(InstShadow, Shadow); if (isFunctionScope) SemaRef.CurrentInstantiationScope->InstantiatedLocal(Shadow, InstShadow); } return Inst; } Decl *TemplateDeclInstantiator::VisitUsingDecl(UsingDecl *D) { // The nested name specifier may be dependent, for example // template struct t { // struct s1 { T f1(); }; // struct s2 : s1 { using s1::f1; }; // }; // template struct t; // Here, in using s1::f1, s1 refers to t::s1; // we need to substitute for t::s1. NestedNameSpecifierLoc QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(), TemplateArgs); if (!QualifierLoc) return nullptr; // For an inheriting constructor declaration, the name of the using // declaration is the name of a constructor in this class, not in the // base class. DeclarationNameInfo NameInfo = D->getNameInfo(); if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName) if (auto *RD = dyn_cast(SemaRef.CurContext)) NameInfo.setName(SemaRef.Context.DeclarationNames.getCXXConstructorName( SemaRef.Context.getCanonicalType(SemaRef.Context.getRecordType(RD)))); // We only need to do redeclaration lookups if we're in a class scope (in // fact, it's not really even possible in non-class scopes). bool CheckRedeclaration = Owner->isRecord(); LookupResult Prev(SemaRef, NameInfo, Sema::LookupUsingDeclName, Sema::ForVisibleRedeclaration); UsingDecl *NewUD = UsingDecl::Create(SemaRef.Context, Owner, D->getUsingLoc(), QualifierLoc, NameInfo, D->hasTypename()); CXXScopeSpec SS; SS.Adopt(QualifierLoc); if (CheckRedeclaration) { Prev.setHideTags(false); SemaRef.LookupQualifiedName(Prev, Owner); // Check for invalid redeclarations. if (SemaRef.CheckUsingDeclRedeclaration(D->getUsingLoc(), D->hasTypename(), SS, D->getLocation(), Prev)) NewUD->setInvalidDecl(); } if (!NewUD->isInvalidDecl() && SemaRef.CheckUsingDeclQualifier(D->getUsingLoc(), D->hasTypename(), SS, NameInfo, D->getLocation(), nullptr, D)) NewUD->setInvalidDecl(); SemaRef.Context.setInstantiatedFromUsingDecl(NewUD, D); NewUD->setAccess(D->getAccess()); Owner->addDecl(NewUD); // Don't process the shadow decls for an invalid decl. if (NewUD->isInvalidDecl()) return NewUD; // If the using scope was dependent, or we had dependent bases, we need to // recheck the inheritance if (NameInfo.getName().getNameKind() == DeclarationName::CXXConstructorName) SemaRef.CheckInheritingConstructorUsingDecl(NewUD); return VisitBaseUsingDecls(D, NewUD, CheckRedeclaration ? &Prev : nullptr); } Decl *TemplateDeclInstantiator::VisitUsingEnumDecl(UsingEnumDecl *D) { // Cannot be a dependent type, but still could be an instantiation EnumDecl *EnumD = cast_or_null(SemaRef.FindInstantiatedDecl( D->getLocation(), D->getEnumDecl(), TemplateArgs)); if (SemaRef.RequireCompleteEnumDecl(EnumD, EnumD->getLocation())) return nullptr; UsingEnumDecl *NewUD = UsingEnumDecl::Create(SemaRef.Context, Owner, D->getUsingLoc(), D->getEnumLoc(), D->getLocation(), EnumD); SemaRef.Context.setInstantiatedFromUsingEnumDecl(NewUD, D); NewUD->setAccess(D->getAccess()); Owner->addDecl(NewUD); // Don't process the shadow decls for an invalid decl. if (NewUD->isInvalidDecl()) return NewUD; // We don't have to recheck for duplication of the UsingEnumDecl itself, as it // cannot be dependent, and will therefore have been checked during template // definition. return VisitBaseUsingDecls(D, NewUD, nullptr); } Decl *TemplateDeclInstantiator::VisitUsingShadowDecl(UsingShadowDecl *D) { // Ignore these; we handle them in bulk when processing the UsingDecl. return nullptr; } Decl *TemplateDeclInstantiator::VisitConstructorUsingShadowDecl( ConstructorUsingShadowDecl *D) { // Ignore these; we handle them in bulk when processing the UsingDecl. return nullptr; } template Decl *TemplateDeclInstantiator::instantiateUnresolvedUsingDecl( T *D, bool InstantiatingPackElement) { // If this is a pack expansion, expand it now. if (D->isPackExpansion() && !InstantiatingPackElement) { SmallVector Unexpanded; SemaRef.collectUnexpandedParameterPacks(D->getQualifierLoc(), Unexpanded); SemaRef.collectUnexpandedParameterPacks(D->getNameInfo(), Unexpanded); // Determine whether the set of unexpanded parameter packs can and should // be expanded. bool Expand = true; bool RetainExpansion = false; Optional NumExpansions; if (SemaRef.CheckParameterPacksForExpansion( D->getEllipsisLoc(), D->getSourceRange(), Unexpanded, TemplateArgs, Expand, RetainExpansion, NumExpansions)) return nullptr; // This declaration cannot appear within a function template signature, // so we can't have a partial argument list for a parameter pack. assert(!RetainExpansion && "should never need to retain an expansion for UsingPackDecl"); if (!Expand) { // We cannot fully expand the pack expansion now, so substitute into the // pattern and create a new pack expansion. Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, -1); return instantiateUnresolvedUsingDecl(D, true); } // Within a function, we don't have any normal way to check for conflicts // between shadow declarations from different using declarations in the // same pack expansion, but this is always ill-formed because all expansions // must produce (conflicting) enumerators. // // Sadly we can't just reject this in the template definition because it // could be valid if the pack is empty or has exactly one expansion. if (D->getDeclContext()->isFunctionOrMethod() && *NumExpansions > 1) { SemaRef.Diag(D->getEllipsisLoc(), diag::err_using_decl_redeclaration_expansion); return nullptr; } // Instantiate the slices of this pack and build a UsingPackDecl. SmallVector Expansions; for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, I); Decl *Slice = instantiateUnresolvedUsingDecl(D, true); if (!Slice) return nullptr; // Note that we can still get unresolved using declarations here, if we // had arguments for all packs but the pattern also contained other // template arguments (this only happens during partial substitution, eg // into the body of a generic lambda in a function template). Expansions.push_back(cast(Slice)); } auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions); if (isDeclWithinFunction(D)) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD); return NewD; } UnresolvedUsingTypenameDecl *TD = dyn_cast(D); SourceLocation TypenameLoc = TD ? TD->getTypenameLoc() : SourceLocation(); NestedNameSpecifierLoc QualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(D->getQualifierLoc(), TemplateArgs); if (!QualifierLoc) return nullptr; CXXScopeSpec SS; SS.Adopt(QualifierLoc); DeclarationNameInfo NameInfo = SemaRef.SubstDeclarationNameInfo(D->getNameInfo(), TemplateArgs); // Produce a pack expansion only if we're not instantiating a particular // slice of a pack expansion. bool InstantiatingSlice = D->getEllipsisLoc().isValid() && SemaRef.ArgumentPackSubstitutionIndex != -1; SourceLocation EllipsisLoc = InstantiatingSlice ? SourceLocation() : D->getEllipsisLoc(); bool IsUsingIfExists = D->template hasAttr(); NamedDecl *UD = SemaRef.BuildUsingDeclaration( /*Scope*/ nullptr, D->getAccess(), D->getUsingLoc(), /*HasTypename*/ TD, TypenameLoc, SS, NameInfo, EllipsisLoc, ParsedAttributesView(), /*IsInstantiation*/ true, IsUsingIfExists); if (UD) { SemaRef.InstantiateAttrs(TemplateArgs, D, UD); SemaRef.Context.setInstantiatedFromUsingDecl(UD, D); } return UD; } Decl *TemplateDeclInstantiator::VisitUnresolvedUsingTypenameDecl( UnresolvedUsingTypenameDecl *D) { return instantiateUnresolvedUsingDecl(D); } Decl *TemplateDeclInstantiator::VisitUnresolvedUsingValueDecl( UnresolvedUsingValueDecl *D) { return instantiateUnresolvedUsingDecl(D); } Decl *TemplateDeclInstantiator::VisitUnresolvedUsingIfExistsDecl( UnresolvedUsingIfExistsDecl *D) { llvm_unreachable("referring to unresolved decl out of UsingShadowDecl"); } Decl *TemplateDeclInstantiator::VisitUsingPackDecl(UsingPackDecl *D) { SmallVector Expansions; for (auto *UD : D->expansions()) { if (NamedDecl *NewUD = SemaRef.FindInstantiatedDecl(D->getLocation(), UD, TemplateArgs)) Expansions.push_back(NewUD); else return nullptr; } auto *NewD = SemaRef.BuildUsingPackDecl(D, Expansions); if (isDeclWithinFunction(D)) SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewD); return NewD; } Decl *TemplateDeclInstantiator::VisitClassScopeFunctionSpecializationDecl( ClassScopeFunctionSpecializationDecl *Decl) { CXXMethodDecl *OldFD = Decl->getSpecialization(); return cast_or_null( VisitCXXMethodDecl(OldFD, nullptr, Decl->getTemplateArgsAsWritten())); } Decl *TemplateDeclInstantiator::VisitOMPThreadPrivateDecl( OMPThreadPrivateDecl *D) { SmallVector Vars; for (auto *I : D->varlists()) { Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get(); assert(isa(Var) && "threadprivate arg is not a DeclRefExpr"); Vars.push_back(Var); } OMPThreadPrivateDecl *TD = SemaRef.CheckOMPThreadPrivateDecl(D->getLocation(), Vars); TD->setAccess(AS_public); Owner->addDecl(TD); return TD; } Decl *TemplateDeclInstantiator::VisitOMPAllocateDecl(OMPAllocateDecl *D) { SmallVector Vars; for (auto *I : D->varlists()) { Expr *Var = SemaRef.SubstExpr(I, TemplateArgs).get(); assert(isa(Var) && "allocate arg is not a DeclRefExpr"); Vars.push_back(Var); } SmallVector Clauses; // Copy map clauses from the original mapper. for (OMPClause *C : D->clauselists()) { OMPClause *IC = nullptr; if (auto *AC = dyn_cast(C)) { ExprResult NewE = SemaRef.SubstExpr(AC->getAllocator(), TemplateArgs); if (!NewE.isUsable()) continue; IC = SemaRef.ActOnOpenMPAllocatorClause( NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc()); } else if (auto *AC = dyn_cast(C)) { ExprResult NewE = SemaRef.SubstExpr(AC->getAlignment(), TemplateArgs); if (!NewE.isUsable()) continue; IC = SemaRef.ActOnOpenMPAlignClause(NewE.get(), AC->getBeginLoc(), AC->getLParenLoc(), AC->getEndLoc()); // If align clause value ends up being invalid, this can end up null. if (!IC) continue; } Clauses.push_back(IC); } Sema::DeclGroupPtrTy Res = SemaRef.ActOnOpenMPAllocateDirective( D->getLocation(), Vars, Clauses, Owner); if (Res.get().isNull()) return nullptr; return Res.get().getSingleDecl(); } Decl *TemplateDeclInstantiator::VisitOMPRequiresDecl(OMPRequiresDecl *D) { llvm_unreachable( "Requires directive cannot be instantiated within a dependent context"); } Decl *TemplateDeclInstantiator::VisitOMPDeclareReductionDecl( OMPDeclareReductionDecl *D) { // Instantiate type and check if it is allowed. const bool RequiresInstantiation = D->getType()->isDependentType() || D->getType()->isInstantiationDependentType() || D->getType()->containsUnexpandedParameterPack(); QualType SubstReductionType; if (RequiresInstantiation) { SubstReductionType = SemaRef.ActOnOpenMPDeclareReductionType( D->getLocation(), ParsedType::make(SemaRef.SubstType( D->getType(), TemplateArgs, D->getLocation(), DeclarationName()))); } else { SubstReductionType = D->getType(); } if (SubstReductionType.isNull()) return nullptr; Expr *Combiner = D->getCombiner(); Expr *Init = D->getInitializer(); bool IsCorrect = true; // Create instantiated copy. std::pair ReductionTypes[] = { std::make_pair(SubstReductionType, D->getLocation())}; auto *PrevDeclInScope = D->getPrevDeclInScope(); if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) { PrevDeclInScope = cast( SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope) ->get()); } auto DRD = SemaRef.ActOnOpenMPDeclareReductionDirectiveStart( /*S=*/nullptr, Owner, D->getDeclName(), ReductionTypes, D->getAccess(), PrevDeclInScope); auto *NewDRD = cast(DRD.get().getSingleDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDRD); Expr *SubstCombiner = nullptr; Expr *SubstInitializer = nullptr; // Combiners instantiation sequence. if (Combiner) { SemaRef.ActOnOpenMPDeclareReductionCombinerStart( /*S=*/nullptr, NewDRD); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getCombinerIn())->getDecl(), cast(NewDRD->getCombinerIn())->getDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getCombinerOut())->getDecl(), cast(NewDRD->getCombinerOut())->getDecl()); auto *ThisContext = dyn_cast_or_null(Owner); Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), ThisContext); SubstCombiner = SemaRef.SubstExpr(Combiner, TemplateArgs).get(); SemaRef.ActOnOpenMPDeclareReductionCombinerEnd(NewDRD, SubstCombiner); } // Initializers instantiation sequence. if (Init) { VarDecl *OmpPrivParm = SemaRef.ActOnOpenMPDeclareReductionInitializerStart( /*S=*/nullptr, NewDRD); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getInitOrig())->getDecl(), cast(NewDRD->getInitOrig())->getDecl()); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getInitPriv())->getDecl(), cast(NewDRD->getInitPriv())->getDecl()); if (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit) { SubstInitializer = SemaRef.SubstExpr(Init, TemplateArgs).get(); } else { auto *OldPrivParm = cast(cast(D->getInitPriv())->getDecl()); IsCorrect = IsCorrect && OldPrivParm->hasInit(); if (IsCorrect) SemaRef.InstantiateVariableInitializer(OmpPrivParm, OldPrivParm, TemplateArgs); } SemaRef.ActOnOpenMPDeclareReductionInitializerEnd(NewDRD, SubstInitializer, OmpPrivParm); } IsCorrect = IsCorrect && SubstCombiner && (!Init || (D->getInitializerKind() == OMPDeclareReductionDecl::CallInit && SubstInitializer) || (D->getInitializerKind() != OMPDeclareReductionDecl::CallInit && !SubstInitializer)); (void)SemaRef.ActOnOpenMPDeclareReductionDirectiveEnd( /*S=*/nullptr, DRD, IsCorrect && !D->isInvalidDecl()); return NewDRD; } Decl * TemplateDeclInstantiator::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { // Instantiate type and check if it is allowed. const bool RequiresInstantiation = D->getType()->isDependentType() || D->getType()->isInstantiationDependentType() || D->getType()->containsUnexpandedParameterPack(); QualType SubstMapperTy; DeclarationName VN = D->getVarName(); if (RequiresInstantiation) { SubstMapperTy = SemaRef.ActOnOpenMPDeclareMapperType( D->getLocation(), ParsedType::make(SemaRef.SubstType(D->getType(), TemplateArgs, D->getLocation(), VN))); } else { SubstMapperTy = D->getType(); } if (SubstMapperTy.isNull()) return nullptr; // Create an instantiated copy of mapper. auto *PrevDeclInScope = D->getPrevDeclInScope(); if (PrevDeclInScope && !PrevDeclInScope->isInvalidDecl()) { PrevDeclInScope = cast( SemaRef.CurrentInstantiationScope->findInstantiationOf(PrevDeclInScope) ->get()); } bool IsCorrect = true; SmallVector Clauses; // Instantiate the mapper variable. DeclarationNameInfo DirName; SemaRef.StartOpenMPDSABlock(llvm::omp::OMPD_declare_mapper, DirName, /*S=*/nullptr, (*D->clauselist_begin())->getBeginLoc()); ExprResult MapperVarRef = SemaRef.ActOnOpenMPDeclareMapperDirectiveVarDecl( /*S=*/nullptr, SubstMapperTy, D->getLocation(), VN); SemaRef.CurrentInstantiationScope->InstantiatedLocal( cast(D->getMapperVarRef())->getDecl(), cast(MapperVarRef.get())->getDecl()); auto *ThisContext = dyn_cast_or_null(Owner); Sema::CXXThisScopeRAII ThisScope(SemaRef, ThisContext, Qualifiers(), ThisContext); // Instantiate map clauses. for (OMPClause *C : D->clauselists()) { auto *OldC = cast(C); SmallVector NewVars; for (Expr *OE : OldC->varlists()) { Expr *NE = SemaRef.SubstExpr(OE, TemplateArgs).get(); if (!NE) { IsCorrect = false; break; } NewVars.push_back(NE); } if (!IsCorrect) break; NestedNameSpecifierLoc NewQualifierLoc = SemaRef.SubstNestedNameSpecifierLoc(OldC->getMapperQualifierLoc(), TemplateArgs); CXXScopeSpec SS; SS.Adopt(NewQualifierLoc); DeclarationNameInfo NewNameInfo = SemaRef.SubstDeclarationNameInfo(OldC->getMapperIdInfo(), TemplateArgs); OMPVarListLocTy Locs(OldC->getBeginLoc(), OldC->getLParenLoc(), OldC->getEndLoc()); OMPClause *NewC = SemaRef.ActOnOpenMPMapClause( OldC->getMapTypeModifiers(), OldC->getMapTypeModifiersLoc(), SS, NewNameInfo, OldC->getMapType(), OldC->isImplicitMapType(), OldC->getMapLoc(), OldC->getColonLoc(), NewVars, Locs); Clauses.push_back(NewC); } SemaRef.EndOpenMPDSABlock(nullptr); if (!IsCorrect) return nullptr; Sema::DeclGroupPtrTy DG = SemaRef.ActOnOpenMPDeclareMapperDirective( /*S=*/nullptr, Owner, D->getDeclName(), SubstMapperTy, D->getLocation(), VN, D->getAccess(), MapperVarRef.get(), Clauses, PrevDeclInScope); Decl *NewDMD = DG.get().getSingleDecl(); SemaRef.CurrentInstantiationScope->InstantiatedLocal(D, NewDMD); return NewDMD; } Decl *TemplateDeclInstantiator::VisitOMPCapturedExprDecl( OMPCapturedExprDecl * /*D*/) { llvm_unreachable("Should not be met in templates"); } Decl *TemplateDeclInstantiator::VisitFunctionDecl(FunctionDecl *D) { return VisitFunctionDecl(D, nullptr); } Decl * TemplateDeclInstantiator::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { Decl *Inst = VisitFunctionDecl(D, nullptr); if (Inst && !D->getDescribedFunctionTemplate()) Owner->addDecl(Inst); return Inst; } Decl *TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl *D) { return VisitCXXMethodDecl(D, nullptr); } Decl *TemplateDeclInstantiator::VisitRecordDecl(RecordDecl *D) { llvm_unreachable("There are only CXXRecordDecls in C++"); } Decl * TemplateDeclInstantiator::VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D) { // As a MS extension, we permit class-scope explicit specialization // of member class templates. ClassTemplateDecl *ClassTemplate = D->getSpecializedTemplate(); assert(ClassTemplate->getDeclContext()->isRecord() && D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization && "can only instantiate an explicit specialization " "for a member class template"); // Lookup the already-instantiated declaration in the instantiation // of the class template. ClassTemplateDecl *InstClassTemplate = cast_or_null(SemaRef.FindInstantiatedDecl( D->getLocation(), ClassTemplate, TemplateArgs)); if (!InstClassTemplate) return nullptr; // Substitute into the template arguments of the class template explicit // specialization. TemplateSpecializationTypeLoc Loc = D->getTypeAsWritten()->getTypeLoc(). castAs(); TemplateArgumentListInfo InstTemplateArgs(Loc.getLAngleLoc(), Loc.getRAngleLoc()); SmallVector ArgLocs; for (unsigned I = 0; I != Loc.getNumArgs(); ++I) ArgLocs.push_back(Loc.getArgLoc(I)); if (SemaRef.SubstTemplateArguments(ArgLocs, TemplateArgs, InstTemplateArgs)) return nullptr; // Check that the template argument list is well-formed for this // class template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(InstClassTemplate, D->getLocation(), InstTemplateArgs, false, Converted, /*UpdateArgsWithConversions=*/true)) return nullptr; // Figure out where to insert this class template explicit specialization // in the member template's set of class template explicit specializations. void *InsertPos = nullptr; ClassTemplateSpecializationDecl *PrevDecl = InstClassTemplate->findSpecialization(Converted, InsertPos); // Check whether we've already seen a conflicting instantiation of this // declaration (for instance, if there was a prior implicit instantiation). bool Ignored; if (PrevDecl && SemaRef.CheckSpecializationInstantiationRedecl(D->getLocation(), D->getSpecializationKind(), PrevDecl, PrevDecl->getSpecializationKind(), PrevDecl->getPointOfInstantiation(), Ignored)) return nullptr; // If PrevDecl was a definition and D is also a definition, diagnose. // This happens in cases like: // // template // struct Outer { // template struct Inner; // template<> struct Inner {}; // template<> struct Inner {}; // }; // // Outer outer; // error: the explicit specializations of Inner // // have the same signature. if (PrevDecl && PrevDecl->getDefinition() && D->isThisDeclarationADefinition()) { SemaRef.Diag(D->getLocation(), diag::err_redefinition) << PrevDecl; SemaRef.Diag(PrevDecl->getDefinition()->getLocation(), diag::note_previous_definition); return nullptr; } // Create the class template partial specialization declaration. ClassTemplateSpecializationDecl *InstD = ClassTemplateSpecializationDecl::Create( SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(), D->getLocation(), InstClassTemplate, Converted, PrevDecl); // Add this partial specialization to the set of class template partial // specializations. if (!PrevDecl) InstClassTemplate->AddSpecialization(InstD, InsertPos); // Substitute the nested name specifier, if any. if (SubstQualifier(D, InstD)) return nullptr; // Build the canonical type that describes the converted template // arguments of the class template explicit specialization. QualType CanonType = SemaRef.Context.getTemplateSpecializationType( TemplateName(InstClassTemplate), Converted, SemaRef.Context.getRecordType(InstD)); // Build the fully-sugared type for this class template // specialization as the user wrote in the specialization // itself. This means that we'll pretty-print the type retrieved // from the specialization's declaration the way that the user // actually wrote the specialization, rather than formatting the // name based on the "canonical" representation used to store the // template arguments in the specialization. TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo( TemplateName(InstClassTemplate), D->getLocation(), InstTemplateArgs, CanonType); InstD->setAccess(D->getAccess()); InstD->setInstantiationOfMemberClass(D, TSK_ImplicitInstantiation); InstD->setSpecializationKind(D->getSpecializationKind()); InstD->setTypeAsWritten(WrittenTy); InstD->setExternLoc(D->getExternLoc()); InstD->setTemplateKeywordLoc(D->getTemplateKeywordLoc()); Owner->addDecl(InstD); // Instantiate the members of the class-scope explicit specialization eagerly. // We don't have support for lazy instantiation of an explicit specialization // yet, and MSVC eagerly instantiates in this case. // FIXME: This is wrong in standard C++. if (D->isThisDeclarationADefinition() && SemaRef.InstantiateClass(D->getLocation(), InstD, D, TemplateArgs, TSK_ImplicitInstantiation, /*Complain=*/true)) return nullptr; return InstD; } Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl( VarTemplateSpecializationDecl *D) { TemplateArgumentListInfo VarTemplateArgsInfo; VarTemplateDecl *VarTemplate = D->getSpecializedTemplate(); assert(VarTemplate && "A template specialization without specialized template?"); VarTemplateDecl *InstVarTemplate = cast_or_null(SemaRef.FindInstantiatedDecl( D->getLocation(), VarTemplate, TemplateArgs)); if (!InstVarTemplate) return nullptr; // Substitute the current template arguments. const TemplateArgumentListInfo &TemplateArgsInfo = D->getTemplateArgsInfo(); VarTemplateArgsInfo.setLAngleLoc(TemplateArgsInfo.getLAngleLoc()); VarTemplateArgsInfo.setRAngleLoc(TemplateArgsInfo.getRAngleLoc()); if (SemaRef.SubstTemplateArguments(TemplateArgsInfo.arguments(), TemplateArgs, VarTemplateArgsInfo)) return nullptr; // Check that the template argument list is well-formed for this template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(InstVarTemplate, D->getLocation(), VarTemplateArgsInfo, false, Converted, /*UpdateArgsWithConversions=*/true)) return nullptr; // Check whether we've already seen a declaration of this specialization. void *InsertPos = nullptr; VarTemplateSpecializationDecl *PrevDecl = InstVarTemplate->findSpecialization(Converted, InsertPos); // Check whether we've already seen a conflicting instantiation of this // declaration (for instance, if there was a prior implicit instantiation). bool Ignored; if (PrevDecl && SemaRef.CheckSpecializationInstantiationRedecl( D->getLocation(), D->getSpecializationKind(), PrevDecl, PrevDecl->getSpecializationKind(), PrevDecl->getPointOfInstantiation(), Ignored)) return nullptr; return VisitVarTemplateSpecializationDecl( InstVarTemplate, D, VarTemplateArgsInfo, Converted, PrevDecl); } Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl( VarTemplateDecl *VarTemplate, VarDecl *D, const TemplateArgumentListInfo &TemplateArgsInfo, ArrayRef Converted, VarTemplateSpecializationDecl *PrevDecl) { // Do substitution on the type of the declaration TypeSourceInfo *DI = SemaRef.SubstType(D->getTypeSourceInfo(), TemplateArgs, D->getTypeSpecStartLoc(), D->getDeclName()); if (!DI) return nullptr; if (DI->getType()->isFunctionType()) { SemaRef.Diag(D->getLocation(), diag::err_variable_instantiates_to_function) << D->isStaticDataMember() << DI->getType(); return nullptr; } // Build the instantiated declaration VarTemplateSpecializationDecl *Var = VarTemplateSpecializationDecl::Create( SemaRef.Context, Owner, D->getInnerLocStart(), D->getLocation(), VarTemplate, DI->getType(), DI, D->getStorageClass(), Converted); Var->setTemplateArgsInfo(TemplateArgsInfo); if (!PrevDecl) { void *InsertPos = nullptr; VarTemplate->findSpecialization(Converted, InsertPos); VarTemplate->AddSpecialization(Var, InsertPos); } if (SemaRef.getLangOpts().OpenCL) SemaRef.deduceOpenCLAddressSpace(Var); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Var)) return nullptr; SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner, StartingScope, false, PrevDecl); return Var; } Decl *TemplateDeclInstantiator::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D) { llvm_unreachable("@defs is not supported in Objective-C++"); } Decl *TemplateDeclInstantiator::VisitFriendTemplateDecl(FriendTemplateDecl *D) { // FIXME: We need to be able to instantiate FriendTemplateDecls. unsigned DiagID = SemaRef.getDiagnostics().getCustomDiagID( DiagnosticsEngine::Error, "cannot instantiate %0 yet"); SemaRef.Diag(D->getLocation(), DiagID) << D->getDeclKindName(); return nullptr; } Decl *TemplateDeclInstantiator::VisitConceptDecl(ConceptDecl *D) { llvm_unreachable("Concept definitions cannot reside inside a template"); } Decl * TemplateDeclInstantiator::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) { return RequiresExprBodyDecl::Create(SemaRef.Context, D->getDeclContext(), D->getBeginLoc()); } Decl *TemplateDeclInstantiator::VisitDecl(Decl *D) { llvm_unreachable("Unexpected decl"); } Decl *Sema::SubstDecl(Decl *D, DeclContext *Owner, const MultiLevelTemplateArgumentList &TemplateArgs) { TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs); if (D->isInvalidDecl()) return nullptr; Decl *SubstD; runWithSufficientStackSpace(D->getLocation(), [&] { SubstD = Instantiator.Visit(D); }); return SubstD; } void TemplateDeclInstantiator::adjustForRewrite(RewriteKind RK, FunctionDecl *Orig, QualType &T, TypeSourceInfo *&TInfo, DeclarationNameInfo &NameInfo) { assert(RK == RewriteKind::RewriteSpaceshipAsEqualEqual); // C++2a [class.compare.default]p3: // the return type is replaced with bool auto *FPT = T->castAs(); T = SemaRef.Context.getFunctionType( SemaRef.Context.BoolTy, FPT->getParamTypes(), FPT->getExtProtoInfo()); // Update the return type in the source info too. The most straightforward // way is to create new TypeSourceInfo for the new type. Use the location of // the '= default' as the location of the new type. // // FIXME: Set the correct return type when we initially transform the type, // rather than delaying it to now. TypeSourceInfo *NewTInfo = SemaRef.Context.getTrivialTypeSourceInfo(T, Orig->getEndLoc()); auto OldLoc = TInfo->getTypeLoc().getAsAdjusted(); assert(OldLoc && "type of function is not a function type?"); auto NewLoc = NewTInfo->getTypeLoc().castAs(); for (unsigned I = 0, N = OldLoc.getNumParams(); I != N; ++I) NewLoc.setParam(I, OldLoc.getParam(I)); TInfo = NewTInfo; // and the declarator-id is replaced with operator== NameInfo.setName( SemaRef.Context.DeclarationNames.getCXXOperatorName(OO_EqualEqual)); } FunctionDecl *Sema::SubstSpaceshipAsEqualEqual(CXXRecordDecl *RD, FunctionDecl *Spaceship) { if (Spaceship->isInvalidDecl()) return nullptr; // C++2a [class.compare.default]p3: // an == operator function is declared implicitly [...] with the same // access and function-definition and in the same class scope as the // three-way comparison operator function MultiLevelTemplateArgumentList NoTemplateArgs; NoTemplateArgs.setKind(TemplateSubstitutionKind::Rewrite); NoTemplateArgs.addOuterRetainedLevels(RD->getTemplateDepth()); TemplateDeclInstantiator Instantiator(*this, RD, NoTemplateArgs); Decl *R; if (auto *MD = dyn_cast(Spaceship)) { R = Instantiator.VisitCXXMethodDecl( MD, nullptr, None, TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual); } else { assert(Spaceship->getFriendObjectKind() && "defaulted spaceship is neither a member nor a friend"); R = Instantiator.VisitFunctionDecl( Spaceship, nullptr, TemplateDeclInstantiator::RewriteKind::RewriteSpaceshipAsEqualEqual); if (!R) return nullptr; FriendDecl *FD = FriendDecl::Create(Context, RD, Spaceship->getLocation(), cast(R), Spaceship->getBeginLoc()); FD->setAccess(AS_public); RD->addDecl(FD); } return cast_or_null(R); } /// Instantiates a nested template parameter list in the current /// instantiation context. /// /// \param L The parameter list to instantiate /// /// \returns NULL if there was an error TemplateParameterList * TemplateDeclInstantiator::SubstTemplateParams(TemplateParameterList *L) { // Get errors for all the parameters before bailing out. bool Invalid = false; unsigned N = L->size(); typedef SmallVector ParamVector; ParamVector Params; Params.reserve(N); for (auto &P : *L) { NamedDecl *D = cast_or_null(Visit(P)); Params.push_back(D); Invalid = Invalid || !D || D->isInvalidDecl(); } // Clean up if we had an error. if (Invalid) return nullptr; // FIXME: Concepts: Substitution into requires clause should only happen when // checking satisfaction. Expr *InstRequiresClause = nullptr; if (Expr *E = L->getRequiresClause()) { EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::Unevaluated); ExprResult Res = SemaRef.SubstExpr(E, TemplateArgs); if (Res.isInvalid() || !Res.isUsable()) { return nullptr; } InstRequiresClause = Res.get(); } TemplateParameterList *InstL = TemplateParameterList::Create(SemaRef.Context, L->getTemplateLoc(), L->getLAngleLoc(), Params, L->getRAngleLoc(), InstRequiresClause); return InstL; } TemplateParameterList * Sema::SubstTemplateParams(TemplateParameterList *Params, DeclContext *Owner, const MultiLevelTemplateArgumentList &TemplateArgs) { TemplateDeclInstantiator Instantiator(*this, Owner, TemplateArgs); return Instantiator.SubstTemplateParams(Params); } /// Instantiate the declaration of a class template partial /// specialization. /// /// \param ClassTemplate the (instantiated) class template that is partially // specialized by the instantiation of \p PartialSpec. /// /// \param PartialSpec the (uninstantiated) class template partial /// specialization that we are instantiating. /// /// \returns The instantiated partial specialization, if successful; otherwise, /// NULL to indicate an error. ClassTemplatePartialSpecializationDecl * TemplateDeclInstantiator::InstantiateClassTemplatePartialSpecialization( ClassTemplateDecl *ClassTemplate, ClassTemplatePartialSpecializationDecl *PartialSpec) { // Create a local instantiation scope for this class template partial // specialization, which will contain the instantiations of the template // parameters. LocalInstantiationScope Scope(SemaRef); // Substitute into the template parameters of the class template partial // specialization. TemplateParameterList *TempParams = PartialSpec->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; // Substitute into the template arguments of the class template partial // specialization. const ASTTemplateArgumentListInfo *TemplArgInfo = PartialSpec->getTemplateArgsAsWritten(); TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc, TemplArgInfo->RAngleLoc); if (SemaRef.SubstTemplateArguments(TemplArgInfo->arguments(), TemplateArgs, InstTemplateArgs)) return nullptr; // Check that the template argument list is well-formed for this // class template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(ClassTemplate, PartialSpec->getLocation(), InstTemplateArgs, false, Converted)) return nullptr; // Check these arguments are valid for a template partial specialization. if (SemaRef.CheckTemplatePartialSpecializationArgs( PartialSpec->getLocation(), ClassTemplate, InstTemplateArgs.size(), Converted)) return nullptr; // Figure out where to insert this class template partial specialization // in the member template's set of class template partial specializations. void *InsertPos = nullptr; ClassTemplateSpecializationDecl *PrevDecl = ClassTemplate->findPartialSpecialization(Converted, InstParams, InsertPos); // Build the canonical type that describes the converted template // arguments of the class template partial specialization. QualType CanonType = SemaRef.Context.getTemplateSpecializationType(TemplateName(ClassTemplate), Converted); // Build the fully-sugared type for this class template // specialization as the user wrote in the specialization // itself. This means that we'll pretty-print the type retrieved // from the specialization's declaration the way that the user // actually wrote the specialization, rather than formatting the // name based on the "canonical" representation used to store the // template arguments in the specialization. TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo( TemplateName(ClassTemplate), PartialSpec->getLocation(), InstTemplateArgs, CanonType); if (PrevDecl) { // We've already seen a partial specialization with the same template // parameters and template arguments. This can happen, for example, when // substituting the outer template arguments ends up causing two // class template partial specializations of a member class template // to have identical forms, e.g., // // template // struct Outer { // template struct Inner; // template struct Inner; // template struct Inner; // }; // // Outer outer; // error: the partial specializations of Inner // // have the same signature. SemaRef.Diag(PartialSpec->getLocation(), diag::err_partial_spec_redeclared) << WrittenTy->getType(); SemaRef.Diag(PrevDecl->getLocation(), diag::note_prev_partial_spec_here) << SemaRef.Context.getTypeDeclType(PrevDecl); return nullptr; } // Create the class template partial specialization declaration. ClassTemplatePartialSpecializationDecl *InstPartialSpec = ClassTemplatePartialSpecializationDecl::Create( SemaRef.Context, PartialSpec->getTagKind(), Owner, PartialSpec->getBeginLoc(), PartialSpec->getLocation(), InstParams, ClassTemplate, Converted, InstTemplateArgs, CanonType, nullptr); // Substitute the nested name specifier, if any. if (SubstQualifier(PartialSpec, InstPartialSpec)) return nullptr; InstPartialSpec->setInstantiatedFromMember(PartialSpec); InstPartialSpec->setTypeAsWritten(WrittenTy); // Check the completed partial specialization. SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec); // Add this partial specialization to the set of class template partial // specializations. ClassTemplate->AddPartialSpecialization(InstPartialSpec, /*InsertPos=*/nullptr); return InstPartialSpec; } /// Instantiate the declaration of a variable template partial /// specialization. /// /// \param VarTemplate the (instantiated) variable template that is partially /// specialized by the instantiation of \p PartialSpec. /// /// \param PartialSpec the (uninstantiated) variable template partial /// specialization that we are instantiating. /// /// \returns The instantiated partial specialization, if successful; otherwise, /// NULL to indicate an error. VarTemplatePartialSpecializationDecl * TemplateDeclInstantiator::InstantiateVarTemplatePartialSpecialization( VarTemplateDecl *VarTemplate, VarTemplatePartialSpecializationDecl *PartialSpec) { // Create a local instantiation scope for this variable template partial // specialization, which will contain the instantiations of the template // parameters. LocalInstantiationScope Scope(SemaRef); // Substitute into the template parameters of the variable template partial // specialization. TemplateParameterList *TempParams = PartialSpec->getTemplateParameters(); TemplateParameterList *InstParams = SubstTemplateParams(TempParams); if (!InstParams) return nullptr; // Substitute into the template arguments of the variable template partial // specialization. const ASTTemplateArgumentListInfo *TemplArgInfo = PartialSpec->getTemplateArgsAsWritten(); TemplateArgumentListInfo InstTemplateArgs(TemplArgInfo->LAngleLoc, TemplArgInfo->RAngleLoc); if (SemaRef.SubstTemplateArguments(TemplArgInfo->arguments(), TemplateArgs, InstTemplateArgs)) return nullptr; // Check that the template argument list is well-formed for this // class template. SmallVector Converted; if (SemaRef.CheckTemplateArgumentList(VarTemplate, PartialSpec->getLocation(), InstTemplateArgs, false, Converted)) return nullptr; // Check these arguments are valid for a template partial specialization. if (SemaRef.CheckTemplatePartialSpecializationArgs( PartialSpec->getLocation(), VarTemplate, InstTemplateArgs.size(), Converted)) return nullptr; // Figure out where to insert this variable template partial specialization // in the member template's set of variable template partial specializations. void *InsertPos = nullptr; VarTemplateSpecializationDecl *PrevDecl = VarTemplate->findPartialSpecialization(Converted, InstParams, InsertPos); // Build the canonical type that describes the converted template // arguments of the variable template partial specialization. QualType CanonType = SemaRef.Context.getTemplateSpecializationType( TemplateName(VarTemplate), Converted); // Build the fully-sugared type for this variable template // specialization as the user wrote in the specialization // itself. This means that we'll pretty-print the type retrieved // from the specialization's declaration the way that the user // actually wrote the specialization, rather than formatting the // name based on the "canonical" representation used to store the // template arguments in the specialization. TypeSourceInfo *WrittenTy = SemaRef.Context.getTemplateSpecializationTypeInfo( TemplateName(VarTemplate), PartialSpec->getLocation(), InstTemplateArgs, CanonType); if (PrevDecl) { // We've already seen a partial specialization with the same template // parameters and template arguments. This can happen, for example, when // substituting the outer template arguments ends up causing two // variable template partial specializations of a member variable template // to have identical forms, e.g., // // template // struct Outer { // template pair p; // template pair p; // template pair p; // }; // // Outer outer; // error: the partial specializations of Inner // // have the same signature. SemaRef.Diag(PartialSpec->getLocation(), diag::err_var_partial_spec_redeclared) << WrittenTy->getType(); SemaRef.Diag(PrevDecl->getLocation(), diag::note_var_prev_partial_spec_here); return nullptr; } // Do substitution on the type of the declaration TypeSourceInfo *DI = SemaRef.SubstType( PartialSpec->getTypeSourceInfo(), TemplateArgs, PartialSpec->getTypeSpecStartLoc(), PartialSpec->getDeclName()); if (!DI) return nullptr; if (DI->getType()->isFunctionType()) { SemaRef.Diag(PartialSpec->getLocation(), diag::err_variable_instantiates_to_function) << PartialSpec->isStaticDataMember() << DI->getType(); return nullptr; } // Create the variable template partial specialization declaration. VarTemplatePartialSpecializationDecl *InstPartialSpec = VarTemplatePartialSpecializationDecl::Create( SemaRef.Context, Owner, PartialSpec->getInnerLocStart(), PartialSpec->getLocation(), InstParams, VarTemplate, DI->getType(), DI, PartialSpec->getStorageClass(), Converted, InstTemplateArgs); // Substitute the nested name specifier, if any. if (SubstQualifier(PartialSpec, InstPartialSpec)) return nullptr; InstPartialSpec->setInstantiatedFromMember(PartialSpec); InstPartialSpec->setTypeAsWritten(WrittenTy); // Check the completed partial specialization. SemaRef.CheckTemplatePartialSpecialization(InstPartialSpec); // Add this partial specialization to the set of variable template partial // specializations. The instantiation of the initializer is not necessary. VarTemplate->AddPartialSpecialization(InstPartialSpec, /*InsertPos=*/nullptr); SemaRef.BuildVariableInstantiation(InstPartialSpec, PartialSpec, TemplateArgs, LateAttrs, Owner, StartingScope); return InstPartialSpec; } TypeSourceInfo* TemplateDeclInstantiator::SubstFunctionType(FunctionDecl *D, SmallVectorImpl &Params) { TypeSourceInfo *OldTInfo = D->getTypeSourceInfo(); assert(OldTInfo && "substituting function without type source info"); assert(Params.empty() && "parameter vector is non-empty at start"); CXXRecordDecl *ThisContext = nullptr; Qualifiers ThisTypeQuals; if (CXXMethodDecl *Method = dyn_cast(D)) { ThisContext = cast(Owner); ThisTypeQuals = Method->getMethodQualifiers(); } TypeSourceInfo *NewTInfo = SemaRef.SubstFunctionDeclType(OldTInfo, TemplateArgs, D->getTypeSpecStartLoc(), D->getDeclName(), ThisContext, ThisTypeQuals); if (!NewTInfo) return nullptr; TypeLoc OldTL = OldTInfo->getTypeLoc().IgnoreParens(); if (FunctionProtoTypeLoc OldProtoLoc = OldTL.getAs()) { if (NewTInfo != OldTInfo) { // Get parameters from the new type info. TypeLoc NewTL = NewTInfo->getTypeLoc().IgnoreParens(); FunctionProtoTypeLoc NewProtoLoc = NewTL.castAs(); unsigned NewIdx = 0; for (unsigned OldIdx = 0, NumOldParams = OldProtoLoc.getNumParams(); OldIdx != NumOldParams; ++OldIdx) { ParmVarDecl *OldParam = OldProtoLoc.getParam(OldIdx); if (!OldParam) return nullptr; LocalInstantiationScope *Scope = SemaRef.CurrentInstantiationScope; Optional NumArgumentsInExpansion; if (OldParam->isParameterPack()) NumArgumentsInExpansion = SemaRef.getNumArgumentsInExpansion(OldParam->getType(), TemplateArgs); if (!NumArgumentsInExpansion) { // Simple case: normal parameter, or a parameter pack that's // instantiated to a (still-dependent) parameter pack. ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++); Params.push_back(NewParam); Scope->InstantiatedLocal(OldParam, NewParam); } else { // Parameter pack expansion: make the instantiation an argument pack. Scope->MakeInstantiatedLocalArgPack(OldParam); for (unsigned I = 0; I != *NumArgumentsInExpansion; ++I) { ParmVarDecl *NewParam = NewProtoLoc.getParam(NewIdx++); Params.push_back(NewParam); Scope->InstantiatedLocalPackArg(OldParam, NewParam); } } } } else { // The function type itself was not dependent and therefore no // substitution occurred. However, we still need to instantiate // the function parameters themselves. const FunctionProtoType *OldProto = cast(OldProtoLoc.getType()); for (unsigned i = 0, i_end = OldProtoLoc.getNumParams(); i != i_end; ++i) { ParmVarDecl *OldParam = OldProtoLoc.getParam(i); if (!OldParam) { Params.push_back(SemaRef.BuildParmVarDeclForTypedef( D, D->getLocation(), OldProto->getParamType(i))); continue; } ParmVarDecl *Parm = cast_or_null(VisitParmVarDecl(OldParam)); if (!Parm) return nullptr; Params.push_back(Parm); } } } else { // If the type of this function, after ignoring parentheses, is not // *directly* a function type, then we're instantiating a function that // was declared via a typedef or with attributes, e.g., // // typedef int functype(int, int); // functype func; // int __cdecl meth(int, int); // // In this case, we'll just go instantiate the ParmVarDecls that we // synthesized in the method declaration. SmallVector ParamTypes; Sema::ExtParameterInfoBuilder ExtParamInfos; if (SemaRef.SubstParmTypes(D->getLocation(), D->parameters(), nullptr, TemplateArgs, ParamTypes, &Params, ExtParamInfos)) return nullptr; } return NewTInfo; } /// Introduce the instantiated function parameters into the local /// instantiation scope, and set the parameter names to those used /// in the template. static bool addInstantiatedParametersToScope(Sema &S, FunctionDecl *Function, const FunctionDecl *PatternDecl, LocalInstantiationScope &Scope, const MultiLevelTemplateArgumentList &TemplateArgs) { unsigned FParamIdx = 0; for (unsigned I = 0, N = PatternDecl->getNumParams(); I != N; ++I) { const ParmVarDecl *PatternParam = PatternDecl->getParamDecl(I); if (!PatternParam->isParameterPack()) { // Simple case: not a parameter pack. assert(FParamIdx < Function->getNumParams()); ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx); FunctionParam->setDeclName(PatternParam->getDeclName()); // If the parameter's type is not dependent, update it to match the type // in the pattern. They can differ in top-level cv-qualifiers, and we want // the pattern's type here. If the type is dependent, they can't differ, // per core issue 1668. Substitute into the type from the pattern, in case // it's instantiation-dependent. // FIXME: Updating the type to work around this is at best fragile. if (!PatternDecl->getType()->isDependentType()) { QualType T = S.SubstType(PatternParam->getType(), TemplateArgs, FunctionParam->getLocation(), FunctionParam->getDeclName()); if (T.isNull()) return true; FunctionParam->setType(T); } Scope.InstantiatedLocal(PatternParam, FunctionParam); ++FParamIdx; continue; } // Expand the parameter pack. Scope.MakeInstantiatedLocalArgPack(PatternParam); Optional NumArgumentsInExpansion = S.getNumArgumentsInExpansion(PatternParam->getType(), TemplateArgs); if (NumArgumentsInExpansion) { QualType PatternType = PatternParam->getType()->castAs()->getPattern(); for (unsigned Arg = 0; Arg < *NumArgumentsInExpansion; ++Arg) { ParmVarDecl *FunctionParam = Function->getParamDecl(FParamIdx); FunctionParam->setDeclName(PatternParam->getDeclName()); if (!PatternDecl->getType()->isDependentType()) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(S, Arg); QualType T = S.SubstType(PatternType, TemplateArgs, FunctionParam->getLocation(), FunctionParam->getDeclName()); if (T.isNull()) return true; FunctionParam->setType(T); } Scope.InstantiatedLocalPackArg(PatternParam, FunctionParam); ++FParamIdx; } } } return false; } bool Sema::InstantiateDefaultArgument(SourceLocation CallLoc, FunctionDecl *FD, ParmVarDecl *Param) { assert(Param->hasUninstantiatedDefaultArg()); Expr *UninstExpr = Param->getUninstantiatedDefaultArg(); EnterExpressionEvaluationContext EvalContext( *this, ExpressionEvaluationContext::PotentiallyEvaluated, Param); // Instantiate the expression. // // FIXME: Pass in a correct Pattern argument, otherwise // getTemplateInstantiationArgs uses the lexical context of FD, e.g. // // template // struct A { // static int FooImpl(); // // template // // bug: default argument A::FooImpl() is evaluated with 2-level // // template argument list [[T], [Tp]], should be [[Tp]]. // friend A Foo(int a); // }; // // template // A Foo(int a = A::FooImpl()); MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(FD, nullptr, /*RelativeToPrimary=*/true); InstantiatingTemplate Inst(*this, CallLoc, Param, TemplateArgs.getInnermost()); if (Inst.isInvalid()) return true; if (Inst.isAlreadyInstantiating()) { Diag(Param->getBeginLoc(), diag::err_recursive_default_argument) << FD; Param->setInvalidDecl(); return true; } ExprResult Result; { // C++ [dcl.fct.default]p5: // The names in the [default argument] expression are bound, and // the semantic constraints are checked, at the point where the // default argument expression appears. ContextRAII SavedContext(*this, FD); LocalInstantiationScope Local(*this); FunctionDecl *Pattern = FD->getTemplateInstantiationPattern( /*ForDefinition*/ false); if (addInstantiatedParametersToScope(*this, FD, Pattern, Local, TemplateArgs)) return true; runWithSufficientStackSpace(CallLoc, [&] { Result = SubstInitializer(UninstExpr, TemplateArgs, /*DirectInit*/false); }); } if (Result.isInvalid()) return true; // Check the expression as an initializer for the parameter. InitializedEntity Entity = InitializedEntity::InitializeParameter(Context, Param); InitializationKind Kind = InitializationKind::CreateCopy( Param->getLocation(), /*FIXME:EqualLoc*/ UninstExpr->getBeginLoc()); Expr *ResultE = Result.getAs(); InitializationSequence InitSeq(*this, Entity, Kind, ResultE); Result = InitSeq.Perform(*this, Entity, Kind, ResultE); if (Result.isInvalid()) return true; Result = ActOnFinishFullExpr(Result.getAs(), Param->getOuterLocStart(), /*DiscardedValue*/ false); if (Result.isInvalid()) return true; // Remember the instantiated default argument. Param->setDefaultArg(Result.getAs()); if (ASTMutationListener *L = getASTMutationListener()) L->DefaultArgumentInstantiated(Param); return false; } void Sema::InstantiateExceptionSpec(SourceLocation PointOfInstantiation, FunctionDecl *Decl) { const FunctionProtoType *Proto = Decl->getType()->castAs(); if (Proto->getExceptionSpecType() != EST_Uninstantiated) return; InstantiatingTemplate Inst(*this, PointOfInstantiation, Decl, InstantiatingTemplate::ExceptionSpecification()); if (Inst.isInvalid()) { // We hit the instantiation depth limit. Clear the exception specification // so that our callers don't have to cope with EST_Uninstantiated. UpdateExceptionSpec(Decl, EST_None); return; } if (Inst.isAlreadyInstantiating()) { // This exception specification indirectly depends on itself. Reject. // FIXME: Corresponding rule in the standard? Diag(PointOfInstantiation, diag::err_exception_spec_cycle) << Decl; UpdateExceptionSpec(Decl, EST_None); return; } // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Decl); LocalInstantiationScope Scope(*this); MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(Decl, nullptr, /*RelativeToPrimary*/true); // FIXME: We can't use getTemplateInstantiationPattern(false) in general // here, because for a non-defining friend declaration in a class template, // we don't store enough information to map back to the friend declaration in // the template. FunctionDecl *Template = Proto->getExceptionSpecTemplate(); if (addInstantiatedParametersToScope(*this, Decl, Template, Scope, TemplateArgs)) { UpdateExceptionSpec(Decl, EST_None); return; } SubstExceptionSpec(Decl, Template->getType()->castAs(), TemplateArgs); } bool Sema::CheckInstantiatedFunctionTemplateConstraints( SourceLocation PointOfInstantiation, FunctionDecl *Decl, ArrayRef TemplateArgs, ConstraintSatisfaction &Satisfaction) { // In most cases we're not going to have constraints, so check for that first. FunctionTemplateDecl *Template = Decl->getPrimaryTemplate(); // Note - code synthesis context for the constraints check is created // inside CheckConstraintsSatisfaction. SmallVector TemplateAC; Template->getAssociatedConstraints(TemplateAC); if (TemplateAC.empty()) { Satisfaction.IsSatisfied = true; return false; } // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Decl); LocalInstantiationScope Scope(*this); // If this is not an explicit specialization - we need to get the instantiated // version of the template arguments and add them to scope for the // substitution. if (Decl->isTemplateInstantiation()) { InstantiatingTemplate Inst(*this, Decl->getPointOfInstantiation(), InstantiatingTemplate::ConstraintsCheck{}, Decl->getPrimaryTemplate(), TemplateArgs, SourceRange()); if (Inst.isInvalid()) return true; MultiLevelTemplateArgumentList MLTAL( *Decl->getTemplateSpecializationArgs()); if (addInstantiatedParametersToScope( *this, Decl, Decl->getPrimaryTemplate()->getTemplatedDecl(), Scope, MLTAL)) return true; } Qualifiers ThisQuals; CXXRecordDecl *Record = nullptr; if (auto *Method = dyn_cast(Decl)) { ThisQuals = Method->getMethodQualifiers(); Record = Method->getParent(); } CXXThisScopeRAII ThisScope(*this, Record, ThisQuals, Record != nullptr); return CheckConstraintSatisfaction(Template, TemplateAC, TemplateArgs, PointOfInstantiation, Satisfaction); } /// Initializes the common fields of an instantiation function /// declaration (New) from the corresponding fields of its template (Tmpl). /// /// \returns true if there was an error bool TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New, FunctionDecl *Tmpl) { New->setImplicit(Tmpl->isImplicit()); // Forward the mangling number from the template to the instantiated decl. SemaRef.Context.setManglingNumber(New, SemaRef.Context.getManglingNumber(Tmpl)); // If we are performing substituting explicitly-specified template arguments // or deduced template arguments into a function template and we reach this // point, we are now past the point where SFINAE applies and have committed // to keeping the new function template specialization. We therefore // convert the active template instantiation for the function template // into a template instantiation for this specific function template // specialization, which is not a SFINAE context, so that we diagnose any // further errors in the declaration itself. // // FIXME: This is a hack. typedef Sema::CodeSynthesisContext ActiveInstType; ActiveInstType &ActiveInst = SemaRef.CodeSynthesisContexts.back(); if (ActiveInst.Kind == ActiveInstType::ExplicitTemplateArgumentSubstitution || ActiveInst.Kind == ActiveInstType::DeducedTemplateArgumentSubstitution) { if (FunctionTemplateDecl *FunTmpl = dyn_cast(ActiveInst.Entity)) { assert(FunTmpl->getTemplatedDecl() == Tmpl && "Deduction from the wrong function template?"); (void) FunTmpl; SemaRef.InstantiatingSpecializations.erase( {ActiveInst.Entity->getCanonicalDecl(), ActiveInst.Kind}); atTemplateEnd(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst); ActiveInst.Kind = ActiveInstType::TemplateInstantiation; ActiveInst.Entity = New; atTemplateBegin(SemaRef.TemplateInstCallbacks, SemaRef, ActiveInst); } } const FunctionProtoType *Proto = Tmpl->getType()->getAs(); assert(Proto && "Function template without prototype?"); if (Proto->hasExceptionSpec() || Proto->getNoReturnAttr()) { FunctionProtoType::ExtProtoInfo EPI = Proto->getExtProtoInfo(); // DR1330: In C++11, defer instantiation of a non-trivial // exception specification. // DR1484: Local classes and their members are instantiated along with the // containing function. if (SemaRef.getLangOpts().CPlusPlus11 && EPI.ExceptionSpec.Type != EST_None && EPI.ExceptionSpec.Type != EST_DynamicNone && EPI.ExceptionSpec.Type != EST_BasicNoexcept && !Tmpl->isInLocalScopeForInstantiation()) { FunctionDecl *ExceptionSpecTemplate = Tmpl; if (EPI.ExceptionSpec.Type == EST_Uninstantiated) ExceptionSpecTemplate = EPI.ExceptionSpec.SourceTemplate; ExceptionSpecificationType NewEST = EST_Uninstantiated; if (EPI.ExceptionSpec.Type == EST_Unevaluated) NewEST = EST_Unevaluated; // Mark the function has having an uninstantiated exception specification. const FunctionProtoType *NewProto = New->getType()->getAs(); assert(NewProto && "Template instantiation without function prototype?"); EPI = NewProto->getExtProtoInfo(); EPI.ExceptionSpec.Type = NewEST; EPI.ExceptionSpec.SourceDecl = New; EPI.ExceptionSpec.SourceTemplate = ExceptionSpecTemplate; New->setType(SemaRef.Context.getFunctionType( NewProto->getReturnType(), NewProto->getParamTypes(), EPI)); } else { Sema::ContextRAII SwitchContext(SemaRef, New); SemaRef.SubstExceptionSpec(New, Proto, TemplateArgs); } } // Get the definition. Leaves the variable unchanged if undefined. const FunctionDecl *Definition = Tmpl; Tmpl->isDefined(Definition); SemaRef.InstantiateAttrs(TemplateArgs, Definition, New, LateAttrs, StartingScope); return false; } /// Initializes common fields of an instantiated method /// declaration (New) from the corresponding fields of its template /// (Tmpl). /// /// \returns true if there was an error bool TemplateDeclInstantiator::InitMethodInstantiation(CXXMethodDecl *New, CXXMethodDecl *Tmpl) { if (InitFunctionInstantiation(New, Tmpl)) return true; if (isa(New) && SemaRef.getLangOpts().CPlusPlus11) SemaRef.AdjustDestructorExceptionSpec(cast(New)); New->setAccess(Tmpl->getAccess()); if (Tmpl->isVirtualAsWritten()) New->setVirtualAsWritten(true); // FIXME: New needs a pointer to Tmpl return false; } bool TemplateDeclInstantiator::SubstDefaultedFunction(FunctionDecl *New, FunctionDecl *Tmpl) { // Transfer across any unqualified lookups. if (auto *DFI = Tmpl->getDefaultedFunctionInfo()) { SmallVector Lookups; Lookups.reserve(DFI->getUnqualifiedLookups().size()); bool AnyChanged = false; for (DeclAccessPair DA : DFI->getUnqualifiedLookups()) { NamedDecl *D = SemaRef.FindInstantiatedDecl(New->getLocation(), DA.getDecl(), TemplateArgs); if (!D) return true; AnyChanged |= (D != DA.getDecl()); Lookups.push_back(DeclAccessPair::make(D, DA.getAccess())); } // It's unlikely that substitution will change any declarations. Don't // store an unnecessary copy in that case. New->setDefaultedFunctionInfo( AnyChanged ? FunctionDecl::DefaultedFunctionInfo::Create( SemaRef.Context, Lookups) : DFI); } SemaRef.SetDeclDefaulted(New, Tmpl->getLocation()); return false; } /// Instantiate (or find existing instantiation of) a function template with a /// given set of template arguments. /// /// Usually this should not be used, and template argument deduction should be /// used in its place. FunctionDecl * Sema::InstantiateFunctionDeclaration(FunctionTemplateDecl *FTD, const TemplateArgumentList *Args, SourceLocation Loc) { FunctionDecl *FD = FTD->getTemplatedDecl(); sema::TemplateDeductionInfo Info(Loc); InstantiatingTemplate Inst( *this, Loc, FTD, Args->asArray(), CodeSynthesisContext::ExplicitTemplateArgumentSubstitution, Info); if (Inst.isInvalid()) return nullptr; ContextRAII SavedContext(*this, FD); MultiLevelTemplateArgumentList MArgs(*Args); return cast_or_null(SubstDecl(FD, FD->getParent(), MArgs)); } /// Instantiate the definition of the given function from its /// template. /// /// \param PointOfInstantiation the point at which the instantiation was /// required. Note that this is not precisely a "point of instantiation" /// for the function, but it's close. /// /// \param Function the already-instantiated declaration of a /// function template specialization or member function of a class template /// specialization. /// /// \param Recursive if true, recursively instantiates any functions that /// are required by this instantiation. /// /// \param DefinitionRequired if true, then we are performing an explicit /// instantiation where the body of the function is required. Complain if /// there is no such body. void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, FunctionDecl *Function, bool Recursive, bool DefinitionRequired, bool AtEndOfTU) { if (Function->isInvalidDecl() || isa(Function)) return; // Never instantiate an explicit specialization except if it is a class scope // explicit specialization. TemplateSpecializationKind TSK = Function->getTemplateSpecializationKindForInstantiation(); if (TSK == TSK_ExplicitSpecialization) return; // Don't instantiate a definition if we already have one. const FunctionDecl *ExistingDefn = nullptr; if (Function->isDefined(ExistingDefn, /*CheckForPendingFriendDefinition=*/true)) { if (ExistingDefn->isThisDeclarationADefinition()) return; // If we're asked to instantiate a function whose body comes from an // instantiated friend declaration, attach the instantiated body to the // corresponding declaration of the function. assert(ExistingDefn->isThisDeclarationInstantiatedFromAFriendDefinition()); Function = const_cast(ExistingDefn); } // Find the function body that we'll be substituting. const FunctionDecl *PatternDecl = Function->getTemplateInstantiationPattern(); assert(PatternDecl && "instantiating a non-template"); const FunctionDecl *PatternDef = PatternDecl->getDefinition(); Stmt *Pattern = nullptr; if (PatternDef) { Pattern = PatternDef->getBody(PatternDef); PatternDecl = PatternDef; if (PatternDef->willHaveBody()) PatternDef = nullptr; } // FIXME: We need to track the instantiation stack in order to know which // definitions should be visible within this instantiation. if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Function, Function->getInstantiatedFromMemberFunction(), PatternDecl, PatternDef, TSK, /*Complain*/DefinitionRequired)) { if (DefinitionRequired) Function->setInvalidDecl(); else if (TSK == TSK_ExplicitInstantiationDefinition) { // Try again at the end of the translation unit (at which point a // definition will be required). assert(!Recursive); Function->setInstantiationIsPending(true); PendingInstantiations.push_back( std::make_pair(Function, PointOfInstantiation)); } else if (TSK == TSK_ImplicitInstantiation) { if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() && !getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) { Diag(PointOfInstantiation, diag::warn_func_template_missing) << Function; Diag(PatternDecl->getLocation(), diag::note_forward_template_decl); if (getLangOpts().CPlusPlus11) Diag(PointOfInstantiation, diag::note_inst_declaration_hint) << Function; } } return; } // Postpone late parsed template instantiations. if (PatternDecl->isLateTemplateParsed() && !LateTemplateParser) { Function->setInstantiationIsPending(true); LateParsedInstantiations.push_back( std::make_pair(Function, PointOfInstantiation)); return; } llvm::TimeTraceScope TimeScope("InstantiateFunction", [&]() { std::string Name; llvm::raw_string_ostream OS(Name); Function->getNameForDiagnostic(OS, getPrintingPolicy(), /*Qualified=*/true); return Name; }); // If we're performing recursive template instantiation, create our own // queue of pending implicit instantiations that we will instantiate later, // while we're still within our own instantiation context. // This has to happen before LateTemplateParser below is called, so that // it marks vtables used in late parsed templates as used. GlobalEagerInstantiationScope GlobalInstantiations(*this, /*Enabled=*/Recursive); LocalEagerInstantiationScope LocalInstantiations(*this); // Call the LateTemplateParser callback if there is a need to late parse // a templated function definition. if (!Pattern && PatternDecl->isLateTemplateParsed() && LateTemplateParser) { // FIXME: Optimize to allow individual templates to be deserialized. if (PatternDecl->isFromASTFile()) ExternalSource->ReadLateParsedTemplates(LateParsedTemplateMap); auto LPTIter = LateParsedTemplateMap.find(PatternDecl); assert(LPTIter != LateParsedTemplateMap.end() && "missing LateParsedTemplate"); LateTemplateParser(OpaqueParser, *LPTIter->second); Pattern = PatternDecl->getBody(PatternDecl); } // Note, we should never try to instantiate a deleted function template. assert((Pattern || PatternDecl->isDefaulted() || PatternDecl->hasSkippedBody()) && "unexpected kind of function template definition"); // C++1y [temp.explicit]p10: // Except for inline functions, declarations with types deduced from their // initializer or return value, and class template specializations, other // explicit instantiation declarations have the effect of suppressing the // implicit instantiation of the entity to which they refer. if (TSK == TSK_ExplicitInstantiationDeclaration && !PatternDecl->isInlined() && !PatternDecl->getReturnType()->getContainedAutoType()) return; if (PatternDecl->isInlined()) { // Function, and all later redeclarations of it (from imported modules, // for instance), are now implicitly inline. for (auto *D = Function->getMostRecentDecl(); /**/; D = D->getPreviousDecl()) { D->setImplicitlyInline(); if (D == Function) break; } } InstantiatingTemplate Inst(*this, PointOfInstantiation, Function); if (Inst.isInvalid() || Inst.isAlreadyInstantiating()) return; PrettyDeclStackTraceEntry CrashInfo(Context, Function, SourceLocation(), "instantiating function definition"); // The instantiation is visible here, even if it was first declared in an // unimported module. Function->setVisibleDespiteOwningModule(); // Copy the inner loc start from the pattern. Function->setInnerLocStart(PatternDecl->getInnerLocStart()); EnterExpressionEvaluationContext EvalContext( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); // Introduce a new scope where local variable instantiations will be // recorded, unless we're actually a member function within a local // class, in which case we need to merge our results with the parent // scope (of the enclosing function). The exception is instantiating // a function template specialization, since the template to be // instantiated already has references to locals properly substituted. bool MergeWithParentScope = false; if (CXXRecordDecl *Rec = dyn_cast(Function->getDeclContext())) MergeWithParentScope = Rec->isLocalClass() && !Function->isFunctionTemplateSpecialization(); LocalInstantiationScope Scope(*this, MergeWithParentScope); auto RebuildTypeSourceInfoForDefaultSpecialMembers = [&]() { // Special members might get their TypeSourceInfo set up w.r.t the // PatternDecl context, in which case parameters could still be pointing // back to the original class, make sure arguments are bound to the // instantiated record instead. assert(PatternDecl->isDefaulted() && "Special member needs to be defaulted"); auto PatternSM = getDefaultedFunctionKind(PatternDecl).asSpecialMember(); if (!(PatternSM == Sema::CXXCopyConstructor || PatternSM == Sema::CXXCopyAssignment || PatternSM == Sema::CXXMoveConstructor || PatternSM == Sema::CXXMoveAssignment)) return; auto *NewRec = dyn_cast(Function->getDeclContext()); const auto *PatternRec = dyn_cast(PatternDecl->getDeclContext()); if (!NewRec || !PatternRec) return; if (!PatternRec->isLambda()) return; struct SpecialMemberTypeInfoRebuilder : TreeTransform { using Base = TreeTransform; const CXXRecordDecl *OldDecl; CXXRecordDecl *NewDecl; SpecialMemberTypeInfoRebuilder(Sema &SemaRef, const CXXRecordDecl *O, CXXRecordDecl *N) : TreeTransform(SemaRef), OldDecl(O), NewDecl(N) {} bool TransformExceptionSpec(SourceLocation Loc, FunctionProtoType::ExceptionSpecInfo &ESI, SmallVectorImpl &Exceptions, bool &Changed) { return false; } QualType TransformRecordType(TypeLocBuilder &TLB, RecordTypeLoc TL) { const RecordType *T = TL.getTypePtr(); RecordDecl *Record = cast_or_null( getDerived().TransformDecl(TL.getNameLoc(), T->getDecl())); if (Record != OldDecl) return Base::TransformRecordType(TLB, TL); QualType Result = getDerived().RebuildRecordType(NewDecl); if (Result.isNull()) return QualType(); RecordTypeLoc NewTL = TLB.push(Result); NewTL.setNameLoc(TL.getNameLoc()); return Result; } } IR{*this, PatternRec, NewRec}; TypeSourceInfo *NewSI = IR.TransformType(Function->getTypeSourceInfo()); Function->setType(NewSI->getType()); Function->setTypeSourceInfo(NewSI); ParmVarDecl *Parm = Function->getParamDecl(0); TypeSourceInfo *NewParmSI = IR.TransformType(Parm->getTypeSourceInfo()); Parm->setType(NewParmSI->getType()); Parm->setTypeSourceInfo(NewParmSI); }; if (PatternDecl->isDefaulted()) { RebuildTypeSourceInfoForDefaultSpecialMembers(); SetDeclDefaulted(Function, PatternDecl->getLocation()); } else { MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(Function, nullptr, false, PatternDecl); // Substitute into the qualifier; we can get a substitution failure here // through evil use of alias templates. // FIXME: Is CurContext correct for this? Should we go to the (instantiation // of the) lexical context of the pattern? SubstQualifier(*this, PatternDecl, Function, TemplateArgs); ActOnStartOfFunctionDef(nullptr, Function); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. Sema::ContextRAII savedContext(*this, Function); if (addInstantiatedParametersToScope(*this, Function, PatternDecl, Scope, TemplateArgs)) return; StmtResult Body; if (PatternDecl->hasSkippedBody()) { ActOnSkippedFunctionBody(Function); Body = nullptr; } else { if (CXXConstructorDecl *Ctor = dyn_cast(Function)) { // If this is a constructor, instantiate the member initializers. InstantiateMemInitializers(Ctor, cast(PatternDecl), TemplateArgs); // If this is an MS ABI dllexport default constructor, instantiate any // default arguments. if (Context.getTargetInfo().getCXXABI().isMicrosoft() && Ctor->isDefaultConstructor()) { InstantiateDefaultCtorDefaultArgs(Ctor); } } // Instantiate the function body. Body = SubstStmt(Pattern, TemplateArgs); if (Body.isInvalid()) Function->setInvalidDecl(); } // FIXME: finishing the function body while in an expression evaluation // context seems wrong. Investigate more. ActOnFinishFunctionBody(Function, Body.get(), /*IsInstantiation=*/true); PerformDependentDiagnostics(PatternDecl, TemplateArgs); if (auto *Listener = getASTMutationListener()) Listener->FunctionDefinitionInstantiated(Function); savedContext.pop(); } DeclGroupRef DG(Function); Consumer.HandleTopLevelDecl(DG); // This class may have local implicit instantiations that need to be // instantiation within this scope. LocalInstantiations.perform(); Scope.Exit(); GlobalInstantiations.perform(); } VarTemplateSpecializationDecl *Sema::BuildVarTemplateInstantiation( VarTemplateDecl *VarTemplate, VarDecl *FromVar, const TemplateArgumentList &TemplateArgList, const TemplateArgumentListInfo &TemplateArgsInfo, SmallVectorImpl &Converted, SourceLocation PointOfInstantiation, LateInstantiatedAttrVec *LateAttrs, LocalInstantiationScope *StartingScope) { if (FromVar->isInvalidDecl()) return nullptr; InstantiatingTemplate Inst(*this, PointOfInstantiation, FromVar); if (Inst.isInvalid()) return nullptr; MultiLevelTemplateArgumentList TemplateArgLists; TemplateArgLists.addOuterTemplateArguments(&TemplateArgList); // Instantiate the first declaration of the variable template: for a partial // specialization of a static data member template, the first declaration may // or may not be the declaration in the class; if it's in the class, we want // to instantiate a member in the class (a declaration), and if it's outside, // we want to instantiate a definition. // // If we're instantiating an explicitly-specialized member template or member // partial specialization, don't do this. The member specialization completely // replaces the original declaration in this case. bool IsMemberSpec = false; if (VarTemplatePartialSpecializationDecl *PartialSpec = dyn_cast(FromVar)) IsMemberSpec = PartialSpec->isMemberSpecialization(); else if (VarTemplateDecl *FromTemplate = FromVar->getDescribedVarTemplate()) IsMemberSpec = FromTemplate->isMemberSpecialization(); if (!IsMemberSpec) FromVar = FromVar->getFirstDecl(); MultiLevelTemplateArgumentList MultiLevelList(TemplateArgList); TemplateDeclInstantiator Instantiator(*this, FromVar->getDeclContext(), MultiLevelList); // TODO: Set LateAttrs and StartingScope ... return cast_or_null( Instantiator.VisitVarTemplateSpecializationDecl( VarTemplate, FromVar, TemplateArgsInfo, Converted)); } /// Instantiates a variable template specialization by completing it /// with appropriate type information and initializer. VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl( VarTemplateSpecializationDecl *VarSpec, VarDecl *PatternDecl, const MultiLevelTemplateArgumentList &TemplateArgs) { assert(PatternDecl->isThisDeclarationADefinition() && "don't have a definition to instantiate from"); // Do substitution on the type of the declaration TypeSourceInfo *DI = SubstType(PatternDecl->getTypeSourceInfo(), TemplateArgs, PatternDecl->getTypeSpecStartLoc(), PatternDecl->getDeclName()); if (!DI) return nullptr; // Update the type of this variable template specialization. VarSpec->setType(DI->getType()); // Convert the declaration into a definition now. VarSpec->setCompleteDefinition(); // Instantiate the initializer. InstantiateVariableInitializer(VarSpec, PatternDecl, TemplateArgs); if (getLangOpts().OpenCL) deduceOpenCLAddressSpace(VarSpec); return VarSpec; } /// BuildVariableInstantiation - Used after a new variable has been created. /// Sets basic variable data and decides whether to postpone the /// variable instantiation. void Sema::BuildVariableInstantiation( VarDecl *NewVar, VarDecl *OldVar, const MultiLevelTemplateArgumentList &TemplateArgs, LateInstantiatedAttrVec *LateAttrs, DeclContext *Owner, LocalInstantiationScope *StartingScope, bool InstantiatingVarTemplate, VarTemplateSpecializationDecl *PrevDeclForVarTemplateSpecialization) { // Instantiating a partial specialization to produce a partial // specialization. bool InstantiatingVarTemplatePartialSpec = isa(OldVar) && isa(NewVar); // Instantiating from a variable template (or partial specialization) to // produce a variable template specialization. bool InstantiatingSpecFromTemplate = isa(NewVar) && (OldVar->getDescribedVarTemplate() || isa(OldVar)); // If we are instantiating a local extern declaration, the // instantiation belongs lexically to the containing function. // If we are instantiating a static data member defined // out-of-line, the instantiation will have the same lexical // context (which will be a namespace scope) as the template. if (OldVar->isLocalExternDecl()) { NewVar->setLocalExternDecl(); NewVar->setLexicalDeclContext(Owner); } else if (OldVar->isOutOfLine()) NewVar->setLexicalDeclContext(OldVar->getLexicalDeclContext()); NewVar->setTSCSpec(OldVar->getTSCSpec()); NewVar->setInitStyle(OldVar->getInitStyle()); NewVar->setCXXForRangeDecl(OldVar->isCXXForRangeDecl()); NewVar->setObjCForDecl(OldVar->isObjCForDecl()); NewVar->setConstexpr(OldVar->isConstexpr()); NewVar->setInitCapture(OldVar->isInitCapture()); NewVar->setPreviousDeclInSameBlockScope( OldVar->isPreviousDeclInSameBlockScope()); NewVar->setAccess(OldVar->getAccess()); if (!OldVar->isStaticDataMember()) { if (OldVar->isUsed(false)) NewVar->setIsUsed(); NewVar->setReferenced(OldVar->isReferenced()); } InstantiateAttrs(TemplateArgs, OldVar, NewVar, LateAttrs, StartingScope); LookupResult Previous( *this, NewVar->getDeclName(), NewVar->getLocation(), NewVar->isLocalExternDecl() ? Sema::LookupRedeclarationWithLinkage : Sema::LookupOrdinaryName, NewVar->isLocalExternDecl() ? Sema::ForExternalRedeclaration : forRedeclarationInCurContext()); if (NewVar->isLocalExternDecl() && OldVar->getPreviousDecl() && (!OldVar->getPreviousDecl()->getDeclContext()->isDependentContext() || OldVar->getPreviousDecl()->getDeclContext()==OldVar->getDeclContext())) { // We have a previous declaration. Use that one, so we merge with the // right type. if (NamedDecl *NewPrev = FindInstantiatedDecl( NewVar->getLocation(), OldVar->getPreviousDecl(), TemplateArgs)) Previous.addDecl(NewPrev); } else if (!isa(NewVar) && OldVar->hasLinkage()) { LookupQualifiedName(Previous, NewVar->getDeclContext(), false); } else if (PrevDeclForVarTemplateSpecialization) { Previous.addDecl(PrevDeclForVarTemplateSpecialization); } CheckVariableDeclaration(NewVar, Previous); if (!InstantiatingVarTemplate) { NewVar->getLexicalDeclContext()->addHiddenDecl(NewVar); if (!NewVar->isLocalExternDecl() || !NewVar->getPreviousDecl()) NewVar->getDeclContext()->makeDeclVisibleInContext(NewVar); } if (!OldVar->isOutOfLine()) { if (NewVar->getDeclContext()->isFunctionOrMethod()) CurrentInstantiationScope->InstantiatedLocal(OldVar, NewVar); } // Link instantiations of static data members back to the template from // which they were instantiated. // // Don't do this when instantiating a template (we link the template itself // back in that case) nor when instantiating a static data member template // (that's not a member specialization). if (NewVar->isStaticDataMember() && !InstantiatingVarTemplate && !InstantiatingSpecFromTemplate) NewVar->setInstantiationOfStaticDataMember(OldVar, TSK_ImplicitInstantiation); // If the pattern is an (in-class) explicit specialization, then the result // is also an explicit specialization. if (VarTemplateSpecializationDecl *OldVTSD = dyn_cast(OldVar)) { if (OldVTSD->getSpecializationKind() == TSK_ExplicitSpecialization && !isa(OldVTSD)) cast(NewVar)->setSpecializationKind( TSK_ExplicitSpecialization); } // Forward the mangling number from the template to the instantiated decl. Context.setManglingNumber(NewVar, Context.getManglingNumber(OldVar)); Context.setStaticLocalNumber(NewVar, Context.getStaticLocalNumber(OldVar)); // Figure out whether to eagerly instantiate the initializer. if (InstantiatingVarTemplate || InstantiatingVarTemplatePartialSpec) { // We're producing a template. Don't instantiate the initializer yet. } else if (NewVar->getType()->isUndeducedType()) { // We need the type to complete the declaration of the variable. InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs); } else if (InstantiatingSpecFromTemplate || (OldVar->isInline() && OldVar->isThisDeclarationADefinition() && !NewVar->isThisDeclarationADefinition())) { // Delay instantiation of the initializer for variable template // specializations or inline static data members until a definition of the // variable is needed. } else { InstantiateVariableInitializer(NewVar, OldVar, TemplateArgs); } // Diagnose unused local variables with dependent types, where the diagnostic // will have been deferred. if (!NewVar->isInvalidDecl() && NewVar->getDeclContext()->isFunctionOrMethod() && OldVar->getType()->isDependentType()) DiagnoseUnusedDecl(NewVar); } /// Instantiate the initializer of a variable. void Sema::InstantiateVariableInitializer( VarDecl *Var, VarDecl *OldVar, const MultiLevelTemplateArgumentList &TemplateArgs) { if (ASTMutationListener *L = getASTContext().getASTMutationListener()) L->VariableDefinitionInstantiated(Var); // We propagate the 'inline' flag with the initializer, because it // would otherwise imply that the variable is a definition for a // non-static data member. if (OldVar->isInlineSpecified()) Var->setInlineSpecified(); else if (OldVar->isInline()) Var->setImplicitlyInline(); if (OldVar->getInit()) { EnterExpressionEvaluationContext Evaluated( *this, Sema::ExpressionEvaluationContext::PotentiallyEvaluated, Var); // Instantiate the initializer. ExprResult Init; { ContextRAII SwitchContext(*this, Var->getDeclContext()); Init = SubstInitializer(OldVar->getInit(), TemplateArgs, OldVar->getInitStyle() == VarDecl::CallInit); } if (!Init.isInvalid()) { Expr *InitExpr = Init.get(); if (Var->hasAttr() && (!InitExpr || !InitExpr->isConstantInitializer(getASTContext(), false))) { // Do not dynamically initialize dllimport variables. } else if (InitExpr) { bool DirectInit = OldVar->isDirectInit(); AddInitializerToDecl(Var, InitExpr, DirectInit); } else ActOnUninitializedDecl(Var); } else { // FIXME: Not too happy about invalidating the declaration // because of a bogus initializer. Var->setInvalidDecl(); } } else { // `inline` variables are a definition and declaration all in one; we won't // pick up an initializer from anywhere else. if (Var->isStaticDataMember() && !Var->isInline()) { if (!Var->isOutOfLine()) return; // If the declaration inside the class had an initializer, don't add // another one to the out-of-line definition. if (OldVar->getFirstDecl()->hasInit()) return; } // We'll add an initializer to a for-range declaration later. if (Var->isCXXForRangeDecl() || Var->isObjCForDecl()) return; ActOnUninitializedDecl(Var); } if (getLangOpts().CUDA) checkAllowedCUDAInitializer(Var); } /// Instantiate the definition of the given variable from its /// template. /// /// \param PointOfInstantiation the point at which the instantiation was /// required. Note that this is not precisely a "point of instantiation" /// for the variable, but it's close. /// /// \param Var the already-instantiated declaration of a templated variable. /// /// \param Recursive if true, recursively instantiates any functions that /// are required by this instantiation. /// /// \param DefinitionRequired if true, then we are performing an explicit /// instantiation where a definition of the variable is required. Complain /// if there is no such definition. void Sema::InstantiateVariableDefinition(SourceLocation PointOfInstantiation, VarDecl *Var, bool Recursive, bool DefinitionRequired, bool AtEndOfTU) { if (Var->isInvalidDecl()) return; // Never instantiate an explicitly-specialized entity. TemplateSpecializationKind TSK = Var->getTemplateSpecializationKindForInstantiation(); if (TSK == TSK_ExplicitSpecialization) return; // Find the pattern and the arguments to substitute into it. VarDecl *PatternDecl = Var->getTemplateInstantiationPattern(); assert(PatternDecl && "no pattern for templated variable"); MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(Var); VarTemplateSpecializationDecl *VarSpec = dyn_cast(Var); if (VarSpec) { // If this is a static data member template, there might be an // uninstantiated initializer on the declaration. If so, instantiate // it now. // // FIXME: This largely duplicates what we would do below. The difference // is that along this path we may instantiate an initializer from an // in-class declaration of the template and instantiate the definition // from a separate out-of-class definition. if (PatternDecl->isStaticDataMember() && (PatternDecl = PatternDecl->getFirstDecl())->hasInit() && !Var->hasInit()) { // FIXME: Factor out the duplicated instantiation context setup/tear down // code here. InstantiatingTemplate Inst(*this, PointOfInstantiation, Var); if (Inst.isInvalid() || Inst.isAlreadyInstantiating()) return; PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(), "instantiating variable initializer"); // The instantiation is visible here, even if it was first declared in an // unimported module. Var->setVisibleDespiteOwningModule(); // If we're performing recursive template instantiation, create our own // queue of pending implicit instantiations that we will instantiate // later, while we're still within our own instantiation context. GlobalEagerInstantiationScope GlobalInstantiations(*this, /*Enabled=*/Recursive); LocalInstantiationScope Local(*this); LocalEagerInstantiationScope LocalInstantiations(*this); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. ContextRAII PreviousContext(*this, Var->getDeclContext()); InstantiateVariableInitializer(Var, PatternDecl, TemplateArgs); PreviousContext.pop(); // This variable may have local implicit instantiations that need to be // instantiated within this scope. LocalInstantiations.perform(); Local.Exit(); GlobalInstantiations.perform(); } } else { assert(Var->isStaticDataMember() && PatternDecl->isStaticDataMember() && "not a static data member?"); } VarDecl *Def = PatternDecl->getDefinition(getASTContext()); // If we don't have a definition of the variable template, we won't perform // any instantiation. Rather, we rely on the user to instantiate this // definition (or provide a specialization for it) in another translation // unit. if (!Def && !DefinitionRequired) { if (TSK == TSK_ExplicitInstantiationDefinition) { PendingInstantiations.push_back( std::make_pair(Var, PointOfInstantiation)); } else if (TSK == TSK_ImplicitInstantiation) { // Warn about missing definition at the end of translation unit. if (AtEndOfTU && !getDiagnostics().hasErrorOccurred() && !getSourceManager().isInSystemHeader(PatternDecl->getBeginLoc())) { Diag(PointOfInstantiation, diag::warn_var_template_missing) << Var; Diag(PatternDecl->getLocation(), diag::note_forward_template_decl); if (getLangOpts().CPlusPlus11) Diag(PointOfInstantiation, diag::note_inst_declaration_hint) << Var; } return; } } // FIXME: We need to track the instantiation stack in order to know which // definitions should be visible within this instantiation. // FIXME: Produce diagnostics when Var->getInstantiatedFromStaticDataMember(). if (DiagnoseUninstantiableTemplate(PointOfInstantiation, Var, /*InstantiatedFromMember*/false, PatternDecl, Def, TSK, /*Complain*/DefinitionRequired)) return; // C++11 [temp.explicit]p10: // Except for inline functions, const variables of literal types, variables // of reference types, [...] explicit instantiation declarations // have the effect of suppressing the implicit instantiation of the entity // to which they refer. // // FIXME: That's not exactly the same as "might be usable in constant // expressions", which only allows constexpr variables and const integral // types, not arbitrary const literal types. if (TSK == TSK_ExplicitInstantiationDeclaration && !Var->mightBeUsableInConstantExpressions(getASTContext())) return; // Make sure to pass the instantiated variable to the consumer at the end. struct PassToConsumerRAII { ASTConsumer &Consumer; VarDecl *Var; PassToConsumerRAII(ASTConsumer &Consumer, VarDecl *Var) : Consumer(Consumer), Var(Var) { } ~PassToConsumerRAII() { Consumer.HandleCXXStaticMemberVarInstantiation(Var); } } PassToConsumerRAII(Consumer, Var); // If we already have a definition, we're done. if (VarDecl *Def = Var->getDefinition()) { // We may be explicitly instantiating something we've already implicitly // instantiated. Def->setTemplateSpecializationKind(Var->getTemplateSpecializationKind(), PointOfInstantiation); return; } InstantiatingTemplate Inst(*this, PointOfInstantiation, Var); if (Inst.isInvalid() || Inst.isAlreadyInstantiating()) return; PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(), "instantiating variable definition"); // If we're performing recursive template instantiation, create our own // queue of pending implicit instantiations that we will instantiate later, // while we're still within our own instantiation context. GlobalEagerInstantiationScope GlobalInstantiations(*this, /*Enabled=*/Recursive); // Enter the scope of this instantiation. We don't use // PushDeclContext because we don't have a scope. ContextRAII PreviousContext(*this, Var->getDeclContext()); LocalInstantiationScope Local(*this); LocalEagerInstantiationScope LocalInstantiations(*this); VarDecl *OldVar = Var; if (Def->isStaticDataMember() && !Def->isOutOfLine()) { // We're instantiating an inline static data member whose definition was // provided inside the class. InstantiateVariableInitializer(Var, Def, TemplateArgs); } else if (!VarSpec) { Var = cast_or_null(SubstDecl(Def, Var->getDeclContext(), TemplateArgs)); } else if (Var->isStaticDataMember() && Var->getLexicalDeclContext()->isRecord()) { // We need to instantiate the definition of a static data member template, // and all we have is the in-class declaration of it. Instantiate a separate // declaration of the definition. TemplateDeclInstantiator Instantiator(*this, Var->getDeclContext(), TemplateArgs); Var = cast_or_null(Instantiator.VisitVarTemplateSpecializationDecl( VarSpec->getSpecializedTemplate(), Def, VarSpec->getTemplateArgsInfo(), VarSpec->getTemplateArgs().asArray(), VarSpec)); if (Var) { llvm::PointerUnion PatternPtr = VarSpec->getSpecializedTemplateOrPartial(); if (VarTemplatePartialSpecializationDecl *Partial = PatternPtr.dyn_cast()) cast(Var)->setInstantiationOf( Partial, &VarSpec->getTemplateInstantiationArgs()); // Attach the initializer. InstantiateVariableInitializer(Var, Def, TemplateArgs); } } else // Complete the existing variable's definition with an appropriately // substituted type and initializer. Var = CompleteVarTemplateSpecializationDecl(VarSpec, Def, TemplateArgs); PreviousContext.pop(); if (Var) { PassToConsumerRAII.Var = Var; Var->setTemplateSpecializationKind(OldVar->getTemplateSpecializationKind(), OldVar->getPointOfInstantiation()); } // This variable may have local implicit instantiations that need to be // instantiated within this scope. LocalInstantiations.perform(); Local.Exit(); GlobalInstantiations.perform(); } void Sema::InstantiateMemInitializers(CXXConstructorDecl *New, const CXXConstructorDecl *Tmpl, const MultiLevelTemplateArgumentList &TemplateArgs) { SmallVector NewInits; bool AnyErrors = Tmpl->isInvalidDecl(); // Instantiate all the initializers. for (const auto *Init : Tmpl->inits()) { // Only instantiate written initializers, let Sema re-construct implicit // ones. if (!Init->isWritten()) continue; SourceLocation EllipsisLoc; if (Init->isPackExpansion()) { // This is a pack expansion. We should expand it now. TypeLoc BaseTL = Init->getTypeSourceInfo()->getTypeLoc(); SmallVector Unexpanded; collectUnexpandedParameterPacks(BaseTL, Unexpanded); collectUnexpandedParameterPacks(Init->getInit(), Unexpanded); bool ShouldExpand = false; bool RetainExpansion = false; Optional NumExpansions; if (CheckParameterPacksForExpansion(Init->getEllipsisLoc(), BaseTL.getSourceRange(), Unexpanded, TemplateArgs, ShouldExpand, RetainExpansion, NumExpansions)) { AnyErrors = true; New->setInvalidDecl(); continue; } assert(ShouldExpand && "Partial instantiation of base initializer?"); // Loop over all of the arguments in the argument pack(s), for (unsigned I = 0; I != *NumExpansions; ++I) { Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(*this, I); // Instantiate the initializer. ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs, /*CXXDirectInit=*/true); if (TempInit.isInvalid()) { AnyErrors = true; break; } // Instantiate the base type. TypeSourceInfo *BaseTInfo = SubstType(Init->getTypeSourceInfo(), TemplateArgs, Init->getSourceLocation(), New->getDeclName()); if (!BaseTInfo) { AnyErrors = true; break; } // Build the initializer. MemInitResult NewInit = BuildBaseInitializer(BaseTInfo->getType(), BaseTInfo, TempInit.get(), New->getParent(), SourceLocation()); if (NewInit.isInvalid()) { AnyErrors = true; break; } NewInits.push_back(NewInit.get()); } continue; } // Instantiate the initializer. ExprResult TempInit = SubstInitializer(Init->getInit(), TemplateArgs, /*CXXDirectInit=*/true); if (TempInit.isInvalid()) { AnyErrors = true; continue; } MemInitResult NewInit; if (Init->isDelegatingInitializer() || Init->isBaseInitializer()) { TypeSourceInfo *TInfo = SubstType(Init->getTypeSourceInfo(), TemplateArgs, Init->getSourceLocation(), New->getDeclName()); if (!TInfo) { AnyErrors = true; New->setInvalidDecl(); continue; } if (Init->isBaseInitializer()) NewInit = BuildBaseInitializer(TInfo->getType(), TInfo, TempInit.get(), New->getParent(), EllipsisLoc); else NewInit = BuildDelegatingInitializer(TInfo, TempInit.get(), cast(CurContext->getParent())); } else if (Init->isMemberInitializer()) { FieldDecl *Member = cast_or_null(FindInstantiatedDecl( Init->getMemberLocation(), Init->getMember(), TemplateArgs)); if (!Member) { AnyErrors = true; New->setInvalidDecl(); continue; } NewInit = BuildMemberInitializer(Member, TempInit.get(), Init->getSourceLocation()); } else if (Init->isIndirectMemberInitializer()) { IndirectFieldDecl *IndirectMember = cast_or_null(FindInstantiatedDecl( Init->getMemberLocation(), Init->getIndirectMember(), TemplateArgs)); if (!IndirectMember) { AnyErrors = true; New->setInvalidDecl(); continue; } NewInit = BuildMemberInitializer(IndirectMember, TempInit.get(), Init->getSourceLocation()); } if (NewInit.isInvalid()) { AnyErrors = true; New->setInvalidDecl(); } else { NewInits.push_back(NewInit.get()); } } // Assign all the initializers to the new constructor. ActOnMemInitializers(New, /*FIXME: ColonLoc */ SourceLocation(), NewInits, AnyErrors); } // TODO: this could be templated if the various decl types used the // same method name. static bool isInstantiationOf(ClassTemplateDecl *Pattern, ClassTemplateDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberTemplate(); } while (Instance); return false; } static bool isInstantiationOf(FunctionTemplateDecl *Pattern, FunctionTemplateDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberTemplate(); } while (Instance); return false; } static bool isInstantiationOf(ClassTemplatePartialSpecializationDecl *Pattern, ClassTemplatePartialSpecializationDecl *Instance) { Pattern = cast(Pattern->getCanonicalDecl()); do { Instance = cast( Instance->getCanonicalDecl()); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMember(); } while (Instance); return false; } static bool isInstantiationOf(CXXRecordDecl *Pattern, CXXRecordDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberClass(); } while (Instance); return false; } static bool isInstantiationOf(FunctionDecl *Pattern, FunctionDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberFunction(); } while (Instance); return false; } static bool isInstantiationOf(EnumDecl *Pattern, EnumDecl *Instance) { Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromMemberEnum(); } while (Instance); return false; } static bool isInstantiationOf(UsingShadowDecl *Pattern, UsingShadowDecl *Instance, ASTContext &C) { return declaresSameEntity(C.getInstantiatedFromUsingShadowDecl(Instance), Pattern); } static bool isInstantiationOf(UsingDecl *Pattern, UsingDecl *Instance, ASTContext &C) { return declaresSameEntity(C.getInstantiatedFromUsingDecl(Instance), Pattern); } template static bool isInstantiationOfUnresolvedUsingDecl(T *Pattern, Decl *Other, ASTContext &Ctx) { // An unresolved using declaration can instantiate to an unresolved using // declaration, or to a using declaration or a using declaration pack. // // Multiple declarations can claim to be instantiated from an unresolved // using declaration if it's a pack expansion. We want the UsingPackDecl // in that case, not the individual UsingDecls within the pack. bool OtherIsPackExpansion; NamedDecl *OtherFrom; if (auto *OtherUUD = dyn_cast(Other)) { OtherIsPackExpansion = OtherUUD->isPackExpansion(); OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUUD); } else if (auto *OtherUPD = dyn_cast(Other)) { OtherIsPackExpansion = true; OtherFrom = OtherUPD->getInstantiatedFromUsingDecl(); } else if (auto *OtherUD = dyn_cast(Other)) { OtherIsPackExpansion = false; OtherFrom = Ctx.getInstantiatedFromUsingDecl(OtherUD); } else { return false; } return Pattern->isPackExpansion() == OtherIsPackExpansion && declaresSameEntity(OtherFrom, Pattern); } static bool isInstantiationOfStaticDataMember(VarDecl *Pattern, VarDecl *Instance) { assert(Instance->isStaticDataMember()); Pattern = Pattern->getCanonicalDecl(); do { Instance = Instance->getCanonicalDecl(); if (Pattern == Instance) return true; Instance = Instance->getInstantiatedFromStaticDataMember(); } while (Instance); return false; } // Other is the prospective instantiation // D is the prospective pattern static bool isInstantiationOf(ASTContext &Ctx, NamedDecl *D, Decl *Other) { if (auto *UUD = dyn_cast(D)) return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx); if (auto *UUD = dyn_cast(D)) return isInstantiationOfUnresolvedUsingDecl(UUD, Other, Ctx); if (D->getKind() != Other->getKind()) return false; if (auto *Record = dyn_cast(Other)) return isInstantiationOf(cast(D), Record); if (auto *Function = dyn_cast(Other)) return isInstantiationOf(cast(D), Function); if (auto *Enum = dyn_cast(Other)) return isInstantiationOf(cast(D), Enum); if (auto *Var = dyn_cast(Other)) if (Var->isStaticDataMember()) return isInstantiationOfStaticDataMember(cast(D), Var); if (auto *Temp = dyn_cast(Other)) return isInstantiationOf(cast(D), Temp); if (auto *Temp = dyn_cast(Other)) return isInstantiationOf(cast(D), Temp); if (auto *PartialSpec = dyn_cast(Other)) return isInstantiationOf(cast(D), PartialSpec); if (auto *Field = dyn_cast(Other)) { if (!Field->getDeclName()) { // This is an unnamed field. return declaresSameEntity(Ctx.getInstantiatedFromUnnamedFieldDecl(Field), cast(D)); } } if (auto *Using = dyn_cast(Other)) return isInstantiationOf(cast(D), Using, Ctx); if (auto *Shadow = dyn_cast(Other)) return isInstantiationOf(cast(D), Shadow, Ctx); return D->getDeclName() && D->getDeclName() == cast(Other)->getDeclName(); } template static NamedDecl *findInstantiationOf(ASTContext &Ctx, NamedDecl *D, ForwardIterator first, ForwardIterator last) { for (; first != last; ++first) if (isInstantiationOf(Ctx, D, *first)) return cast(*first); return nullptr; } /// Finds the instantiation of the given declaration context /// within the current instantiation. /// /// \returns NULL if there was an error DeclContext *Sema::FindInstantiatedContext(SourceLocation Loc, DeclContext* DC, const MultiLevelTemplateArgumentList &TemplateArgs) { if (NamedDecl *D = dyn_cast(DC)) { Decl* ID = FindInstantiatedDecl(Loc, D, TemplateArgs, true); return cast_or_null(ID); } else return DC; } /// Determine whether the given context is dependent on template parameters at /// level \p Level or below. /// /// Sometimes we only substitute an inner set of template arguments and leave /// the outer templates alone. In such cases, contexts dependent only on the /// outer levels are not effectively dependent. static bool isDependentContextAtLevel(DeclContext *DC, unsigned Level) { if (!DC->isDependentContext()) return false; if (!Level) return true; return cast(DC)->getTemplateDepth() > Level; } /// Find the instantiation of the given declaration within the /// current instantiation. /// /// This routine is intended to be used when \p D is a declaration /// referenced from within a template, that needs to mapped into the /// corresponding declaration within an instantiation. For example, /// given: /// /// \code /// template /// struct X { /// enum Kind { /// KnownValue = sizeof(T) /// }; /// /// bool getKind() const { return KnownValue; } /// }; /// /// template struct X; /// \endcode /// /// In the instantiation of X::getKind(), we need to map the \p /// EnumConstantDecl for \p KnownValue (which refers to /// X::::KnownValue) to its instantiation (X::::KnownValue). /// \p FindInstantiatedDecl performs this mapping from within the instantiation /// of X. NamedDecl *Sema::FindInstantiatedDecl(SourceLocation Loc, NamedDecl *D, const MultiLevelTemplateArgumentList &TemplateArgs, bool FindingInstantiatedContext) { DeclContext *ParentDC = D->getDeclContext(); // Determine whether our parent context depends on any of the template // arguments we're currently substituting. bool ParentDependsOnArgs = isDependentContextAtLevel( ParentDC, TemplateArgs.getNumRetainedOuterLevels()); // FIXME: Parameters of pointer to functions (y below) that are themselves // parameters (p below) can have their ParentDC set to the translation-unit // - thus we can not consistently check if the ParentDC of such a parameter // is Dependent or/and a FunctionOrMethod. // For e.g. this code, during Template argument deduction tries to // find an instantiated decl for (T y) when the ParentDC for y is // the translation unit. // e.g. template void Foo(auto (*p)(T y) -> decltype(y())) {} // float baz(float(*)()) { return 0.0; } // Foo(baz); // The better fix here is perhaps to ensure that a ParmVarDecl, by the time // it gets here, always has a FunctionOrMethod as its ParentDC?? // For now: // - as long as we have a ParmVarDecl whose parent is non-dependent and // whose type is not instantiation dependent, do nothing to the decl // - otherwise find its instantiated decl. if (isa(D) && !ParentDependsOnArgs && !cast(D)->getType()->isInstantiationDependentType()) return D; if (isa(D) || isa(D) || isa(D) || isa(D) || (ParentDependsOnArgs && (ParentDC->isFunctionOrMethod() || isa(ParentDC) || isa(ParentDC))) || - (isa(D) && cast(D)->isLambda())) { + (isa(D) && cast(D)->isLambda() && + cast(D)->getTemplateDepth() > + TemplateArgs.getNumRetainedOuterLevels())) { // D is a local of some kind. Look into the map of local // declarations to their instantiations. if (CurrentInstantiationScope) { if (auto Found = CurrentInstantiationScope->findInstantiationOf(D)) { if (Decl *FD = Found->dyn_cast()) return cast(FD); int PackIdx = ArgumentPackSubstitutionIndex; assert(PackIdx != -1 && "found declaration pack but not pack expanding"); typedef LocalInstantiationScope::DeclArgumentPack DeclArgumentPack; return cast((*Found->get())[PackIdx]); } } // If we're performing a partial substitution during template argument // deduction, we may not have values for template parameters yet. They // just map to themselves. if (isa(D) || isa(D) || isa(D)) return D; if (D->isInvalidDecl()) return nullptr; // Normally this function only searches for already instantiated declaration // however we have to make an exclusion for local types used before // definition as in the code: // // template void f1() { // void g1(struct x1); // struct x1 {}; // } // // In this case instantiation of the type of 'g1' requires definition of // 'x1', which is defined later. Error recovery may produce an enum used // before definition. In these cases we need to instantiate relevant // declarations here. bool NeedInstantiate = false; if (CXXRecordDecl *RD = dyn_cast(D)) NeedInstantiate = RD->isLocalClass(); else if (isa(D) && isa(D->getDeclContext())) NeedInstantiate = true; else NeedInstantiate = isa(D); if (NeedInstantiate) { Decl *Inst = SubstDecl(D, CurContext, TemplateArgs); CurrentInstantiationScope->InstantiatedLocal(D, Inst); return cast(Inst); } // If we didn't find the decl, then we must have a label decl that hasn't // been found yet. Lazily instantiate it and return it now. assert(isa(D)); Decl *Inst = SubstDecl(D, CurContext, TemplateArgs); assert(Inst && "Failed to instantiate label??"); CurrentInstantiationScope->InstantiatedLocal(D, Inst); return cast(Inst); } if (CXXRecordDecl *Record = dyn_cast(D)) { if (!Record->isDependentContext()) return D; // Determine whether this record is the "templated" declaration describing // a class template or class template partial specialization. ClassTemplateDecl *ClassTemplate = Record->getDescribedClassTemplate(); if (ClassTemplate) ClassTemplate = ClassTemplate->getCanonicalDecl(); else if (ClassTemplatePartialSpecializationDecl *PartialSpec = dyn_cast(Record)) ClassTemplate = PartialSpec->getSpecializedTemplate()->getCanonicalDecl(); // Walk the current context to find either the record or an instantiation of // it. DeclContext *DC = CurContext; while (!DC->isFileContext()) { // If we're performing substitution while we're inside the template // definition, we'll find our own context. We're done. if (DC->Equals(Record)) return Record; if (CXXRecordDecl *InstRecord = dyn_cast(DC)) { // Check whether we're in the process of instantiating a class template // specialization of the template we're mapping. if (ClassTemplateSpecializationDecl *InstSpec = dyn_cast(InstRecord)){ ClassTemplateDecl *SpecTemplate = InstSpec->getSpecializedTemplate(); if (ClassTemplate && isInstantiationOf(ClassTemplate, SpecTemplate)) return InstRecord; } // Check whether we're in the process of instantiating a member class. if (isInstantiationOf(Record, InstRecord)) return InstRecord; } // Move to the outer template scope. if (FunctionDecl *FD = dyn_cast(DC)) { if (FD->getFriendObjectKind() && FD->getDeclContext()->isFileContext()){ DC = FD->getLexicalDeclContext(); continue; } // An implicit deduction guide acts as if it's within the class template // specialization described by its name and first N template params. auto *Guide = dyn_cast(FD); if (Guide && Guide->isImplicit()) { TemplateDecl *TD = Guide->getDeducedTemplate(); // Convert the arguments to an "as-written" list. TemplateArgumentListInfo Args(Loc, Loc); for (TemplateArgument Arg : TemplateArgs.getInnermost().take_front( TD->getTemplateParameters()->size())) { ArrayRef Unpacked(Arg); if (Arg.getKind() == TemplateArgument::Pack) Unpacked = Arg.pack_elements(); for (TemplateArgument UnpackedArg : Unpacked) Args.addArgument( getTrivialTemplateArgumentLoc(UnpackedArg, QualType(), Loc)); } QualType T = CheckTemplateIdType(TemplateName(TD), Loc, Args); if (T.isNull()) return nullptr; auto *SubstRecord = T->getAsCXXRecordDecl(); assert(SubstRecord && "class template id not a class type?"); // Check that this template-id names the primary template and not a // partial or explicit specialization. (In the latter cases, it's // meaningless to attempt to find an instantiation of D within the // specialization.) // FIXME: The standard doesn't say what should happen here. if (FindingInstantiatedContext && usesPartialOrExplicitSpecialization( Loc, cast(SubstRecord))) { Diag(Loc, diag::err_specialization_not_primary_template) << T << (SubstRecord->getTemplateSpecializationKind() == TSK_ExplicitSpecialization); return nullptr; } DC = SubstRecord; continue; } } DC = DC->getParent(); } // Fall through to deal with other dependent record types (e.g., // anonymous unions in class templates). } if (!ParentDependsOnArgs) return D; ParentDC = FindInstantiatedContext(Loc, ParentDC, TemplateArgs); if (!ParentDC) return nullptr; if (ParentDC != D->getDeclContext()) { // We performed some kind of instantiation in the parent context, // so now we need to look into the instantiated parent context to // find the instantiation of the declaration D. // If our context used to be dependent, we may need to instantiate // it before performing lookup into that context. bool IsBeingInstantiated = false; if (CXXRecordDecl *Spec = dyn_cast(ParentDC)) { if (!Spec->isDependentContext()) { QualType T = Context.getTypeDeclType(Spec); const RecordType *Tag = T->getAs(); assert(Tag && "type of non-dependent record is not a RecordType"); if (Tag->isBeingDefined()) IsBeingInstantiated = true; if (!Tag->isBeingDefined() && RequireCompleteType(Loc, T, diag::err_incomplete_type)) return nullptr; ParentDC = Tag->getDecl(); } } NamedDecl *Result = nullptr; // FIXME: If the name is a dependent name, this lookup won't necessarily // find it. Does that ever matter? if (auto Name = D->getDeclName()) { DeclarationNameInfo NameInfo(Name, D->getLocation()); DeclarationNameInfo NewNameInfo = SubstDeclarationNameInfo(NameInfo, TemplateArgs); Name = NewNameInfo.getName(); if (!Name) return nullptr; DeclContext::lookup_result Found = ParentDC->lookup(Name); Result = findInstantiationOf(Context, D, Found.begin(), Found.end()); } else { // Since we don't have a name for the entity we're looking for, // our only option is to walk through all of the declarations to // find that name. This will occur in a few cases: // // - anonymous struct/union within a template // - unnamed class/struct/union/enum within a template // // FIXME: Find a better way to find these instantiations! Result = findInstantiationOf(Context, D, ParentDC->decls_begin(), ParentDC->decls_end()); } if (!Result) { if (isa(D)) { // UsingShadowDecls can instantiate to nothing because of using hiding. } else if (hasUncompilableErrorOccurred()) { // We've already complained about some ill-formed code, so most likely // this declaration failed to instantiate. There's no point in // complaining further, since this is normal in invalid code. // FIXME: Use more fine-grained 'invalid' tracking for this. } else if (IsBeingInstantiated) { // The class in which this member exists is currently being // instantiated, and we haven't gotten around to instantiating this // member yet. This can happen when the code uses forward declarations // of member classes, and introduces ordering dependencies via // template instantiation. Diag(Loc, diag::err_member_not_yet_instantiated) << D->getDeclName() << Context.getTypeDeclType(cast(ParentDC)); Diag(D->getLocation(), diag::note_non_instantiated_member_here); } else if (EnumConstantDecl *ED = dyn_cast(D)) { // This enumeration constant was found when the template was defined, // but can't be found in the instantiation. This can happen if an // unscoped enumeration member is explicitly specialized. EnumDecl *Enum = cast(ED->getLexicalDeclContext()); EnumDecl *Spec = cast(FindInstantiatedDecl(Loc, Enum, TemplateArgs)); assert(Spec->getTemplateSpecializationKind() == TSK_ExplicitSpecialization); Diag(Loc, diag::err_enumerator_does_not_exist) << D->getDeclName() << Context.getTypeDeclType(cast(Spec->getDeclContext())); Diag(Spec->getLocation(), diag::note_enum_specialized_here) << Context.getTypeDeclType(Spec); } else { // We should have found something, but didn't. llvm_unreachable("Unable to find instantiation of declaration!"); } } D = Result; } return D; } /// Performs template instantiation for all implicit template /// instantiations we have seen until this point. void Sema::PerformPendingInstantiations(bool LocalOnly) { std::deque delayedPCHInstantiations; while (!PendingLocalImplicitInstantiations.empty() || (!LocalOnly && !PendingInstantiations.empty())) { PendingImplicitInstantiation Inst; if (PendingLocalImplicitInstantiations.empty()) { Inst = PendingInstantiations.front(); PendingInstantiations.pop_front(); } else { Inst = PendingLocalImplicitInstantiations.front(); PendingLocalImplicitInstantiations.pop_front(); } // Instantiate function definitions if (FunctionDecl *Function = dyn_cast(Inst.first)) { bool DefinitionRequired = Function->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; if (Function->isMultiVersion()) { getASTContext().forEachMultiversionedFunctionVersion( Function, [this, Inst, DefinitionRequired](FunctionDecl *CurFD) { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, CurFD, true, DefinitionRequired, true); if (CurFD->isDefined()) CurFD->setInstantiationIsPending(false); }); } else { InstantiateFunctionDefinition(/*FIXME:*/ Inst.second, Function, true, DefinitionRequired, true); if (Function->isDefined()) Function->setInstantiationIsPending(false); } // Definition of a PCH-ed template declaration may be available only in the TU. if (!LocalOnly && LangOpts.PCHInstantiateTemplates && TUKind == TU_Prefix && Function->instantiationIsPending()) delayedPCHInstantiations.push_back(Inst); continue; } // Instantiate variable definitions VarDecl *Var = cast(Inst.first); assert((Var->isStaticDataMember() || isa(Var)) && "Not a static data member, nor a variable template" " specialization?"); // Don't try to instantiate declarations if the most recent redeclaration // is invalid. if (Var->getMostRecentDecl()->isInvalidDecl()) continue; // Check if the most recent declaration has changed the specialization kind // and removed the need for implicit instantiation. switch (Var->getMostRecentDecl() ->getTemplateSpecializationKindForInstantiation()) { case TSK_Undeclared: llvm_unreachable("Cannot instantitiate an undeclared specialization."); case TSK_ExplicitInstantiationDeclaration: case TSK_ExplicitSpecialization: continue; // No longer need to instantiate this type. case TSK_ExplicitInstantiationDefinition: // We only need an instantiation if the pending instantiation *is* the // explicit instantiation. if (Var != Var->getMostRecentDecl()) continue; break; case TSK_ImplicitInstantiation: break; } PrettyDeclStackTraceEntry CrashInfo(Context, Var, SourceLocation(), "instantiating variable definition"); bool DefinitionRequired = Var->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; // Instantiate static data member definitions or variable template // specializations. InstantiateVariableDefinition(/*FIXME:*/ Inst.second, Var, true, DefinitionRequired, true); } if (!LocalOnly && LangOpts.PCHInstantiateTemplates) PendingInstantiations.swap(delayedPCHInstantiations); } void Sema::PerformDependentDiagnostics(const DeclContext *Pattern, const MultiLevelTemplateArgumentList &TemplateArgs) { for (auto DD : Pattern->ddiags()) { switch (DD->getKind()) { case DependentDiagnostic::Access: HandleDependentAccessCheck(*DD, TemplateArgs); break; } } } diff --git a/libcxx/include/span b/libcxx/include/span index fd95ecca17f7..b8dbc7e01fd6 100644 --- a/libcxx/include/span +++ b/libcxx/include/span @@ -1,592 +1,636 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// #ifndef _LIBCPP_SPAN #define _LIBCPP_SPAN /* span synopsis namespace std { // constants inline constexpr size_t dynamic_extent = numeric_limits::max(); // [views.span], class template span template class span; template inline constexpr bool ranges::enable_view> = true; template inline constexpr bool ranges::enable_borrowed_range> = true; // [span.objectrep], views of object representation template span as_bytes(span s) noexcept; template span< byte, ((Extent == dynamic_extent) ? dynamic_extent : (sizeof(ElementType) * Extent))> as_writable_bytes(span s) noexcept; template class span { public: // constants and types using element_type = ElementType; using value_type = remove_cv_t; using size_type = size_t; using difference_type = ptrdiff_t; using pointer = element_type*; using const_pointer = const element_type*; using reference = element_type&; using const_reference = const element_type&; using iterator = implementation-defined; using reverse_iterator = std::reverse_iterator; static constexpr size_type extent = Extent; // [span.cons], span constructors, copy, assignment, and destructor constexpr span() noexcept; template constexpr explicit(Extent != dynamic_extent) span(It first, size_type count); template constexpr explicit(Extent != dynamic_extent) span(It first, End last); template constexpr span(type_identity_t (&arr)[N]) noexcept; template constexpr span(array& arr) noexcept; template constexpr span(const array& arr) noexcept; template constexpr explicit(Extent != dynamic_extent) span(R&& r); constexpr span(const span& other) noexcept = default; template constexpr explicit(Extent != dynamic_extent) span(const span& s) noexcept; ~span() noexcept = default; constexpr span& operator=(const span& other) noexcept = default; // [span.sub], span subviews template constexpr span first() const; template constexpr span last() const; template constexpr span subspan() const; constexpr span first(size_type count) const; constexpr span last(size_type count) const; constexpr span subspan(size_type offset, size_type count = dynamic_extent) const; // [span.obs], span observers constexpr size_type size() const noexcept; constexpr size_type size_bytes() const noexcept; [[nodiscard]] constexpr bool empty() const noexcept; // [span.elem], span element access constexpr reference operator[](size_type idx) const; constexpr reference front() const; constexpr reference back() const; constexpr pointer data() const noexcept; // [span.iterators], span iterator support constexpr iterator begin() const noexcept; constexpr iterator end() const noexcept; constexpr reverse_iterator rbegin() const noexcept; constexpr reverse_iterator rend() const noexcept; private: pointer data_; // exposition only size_type size_; // exposition only }; template span(It, EndOrSize) -> span>>; template span(T (&)[N]) -> span; template span(array&) -> span; template span(const array&) -> span; template span(R&&) -> span>>; } // namespace std */ #include <__config> #include <__debug> #include <__iterator/concepts.h> #include <__iterator/wrap_iter.h> #include <__ranges/concepts.h> #include <__ranges/data.h> #include <__ranges/enable_borrowed_range.h> #include <__ranges/enable_view.h> #include <__ranges/size.h> #include // for array #include // for byte #include // for iterators #include #include // for remove_cv, etc #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif _LIBCPP_PUSH_MACROS #include <__undef_macros> _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER > 17 inline constexpr size_t dynamic_extent = numeric_limits::max(); template class span; template struct __is_std_array : false_type {}; template struct __is_std_array> : true_type {}; template struct __is_std_span : false_type {}; template struct __is_std_span> : true_type {}; -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +// This is a temporary workaround until we ship -- we've unfortunately been +// shipping before its API was finalized, and we used to provide a constructor +// from container types that had the requirements below. To avoid breaking code that +// has started relying on the range-based constructor until we ship all of , +// we emulate the constructor requirements like this. +template +struct __span_compatible_range : false_type { }; + +template +struct __span_compatible_range<_Range, _ElementType, void_t< + enable_if_t>::value>, + enable_if_t>::value>, + enable_if_t>>, + decltype(data(declval<_Range>())), + decltype(size(declval<_Range>())), + enable_if_t()))>(*)[], _ElementType(*)[]>> +>> : true_type { }; +#else template concept __span_compatible_range = ranges::contiguous_range<_Range> && ranges::sized_range<_Range> && (ranges::borrowed_range<_Range> || is_const_v<_ElementType>) && !__is_std_span>::value && !__is_std_array>::value && !is_array_v> && is_convertible_v>(*)[], _ElementType(*)[]>; #endif template class _LIBCPP_TEMPLATE_VIS span { public: // constants and types using element_type = _Tp; using value_type = remove_cv_t<_Tp>; using size_type = size_t; using difference_type = ptrdiff_t; using pointer = _Tp *; using const_pointer = const _Tp *; using reference = _Tp &; using const_reference = const _Tp &; #if (_LIBCPP_DEBUG_LEVEL == 2) || defined(_LIBCPP_ABI_SPAN_POINTER_ITERATORS) using iterator = pointer; #else using iterator = __wrap_iter; #endif using reverse_iterator = _VSTD::reverse_iterator; static constexpr size_type extent = _Extent; // [span.cons], span constructors, copy, assignment, and destructor template = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr} {} constexpr span (const span&) noexcept = default; constexpr span& operator=(const span&) noexcept = default; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template && is_convertible_v>(*)[], element_type (*)[]>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_It __first, size_type __count) : __data{_VSTD::to_address(__first)} { (void)__count; _LIBCPP_ASSERT(_Extent == __count, "size mismatch in span's constructor (iterator, len)"); } template < class _It, class _End, enable_if_t > (*)[], element_type (*)[]> && contiguous_iterator<_It> && sized_sentinel_for<_End, _It> && !is_convertible_v<_End, size_t>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_It __first, _End __last) : __data{_VSTD::to_address(__first)} { (void)__last; _LIBCPP_ASSERT((__last - __first >= 0), "invalid range in span's constructor (iterator, sentinel)"); _LIBCPP_ASSERT(__last - __first == _Extent, "invalid range in span's constructor (iterator, sentinel): last - first != extent"); } #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) _LIBCPP_INLINE_VISIBILITY constexpr span(type_identity_t (&__arr)[_Extent]) noexcept : __data{__arr} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Extent>& __arr) noexcept : __data{__arr.data()} {} -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr explicit span(_Container& __c) : __data{std::data(__c)} { + _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)"); + } + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr explicit span(const _Container& __c) : __data{std::data(__c)} { + _LIBCPP_ASSERT(std::size(__c) == _Extent, "size mismatch in span's constructor (range)"); + } +#else template <__span_compatible_range _Range> _LIBCPP_INLINE_VISIBILITY constexpr explicit span(_Range&& __r) : __data{ranges::data(__r)} { _LIBCPP_ASSERT(ranges::size(__r) == _Extent, "size mismatch in span's constructor (range)"); } #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) template _LIBCPP_INLINE_VISIBILITY constexpr span(const span<_OtherElementType, _Extent>& __other, enable_if_t< is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr) : __data{__other.data()} {} template _LIBCPP_INLINE_VISIBILITY constexpr explicit span(const span<_OtherElementType, dynamic_extent>& __other, enable_if_t< is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr) noexcept : __data{__other.data()} { _LIBCPP_ASSERT(_Extent == __other.size(), "size mismatch in span's constructor (other span)"); } // ~span() noexcept = default; template _LIBCPP_INLINE_VISIBILITY constexpr span first() const noexcept { static_assert(_Count <= _Extent, "Count out of range in span::first()"); return span{data(), _Count}; } template _LIBCPP_INLINE_VISIBILITY constexpr span last() const noexcept { static_assert(_Count <= _Extent, "Count out of range in span::last()"); return span{data() + size() - _Count, _Count}; } _LIBCPP_INLINE_VISIBILITY constexpr span first(size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::first(count)"); return {data(), __count}; } _LIBCPP_INLINE_VISIBILITY constexpr span last(size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::last(count)"); return {data() + size() - __count, __count}; } template _LIBCPP_INLINE_VISIBILITY constexpr auto subspan() const noexcept -> span { static_assert(_Offset <= _Extent, "Offset out of range in span::subspan()"); static_assert(_Count == dynamic_extent || _Count <= _Extent - _Offset, "Offset + count out of range in span::subspan()"); using _ReturnType = span; return _ReturnType{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; } _LIBCPP_INLINE_VISIBILITY constexpr span subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept { _LIBCPP_ASSERT(__offset <= size(), "Offset out of range in span::subspan(offset, count)"); _LIBCPP_ASSERT(__count <= size() || __count == dynamic_extent, "Count out of range in span::subspan(offset, count)"); if (__count == dynamic_extent) return {data() + __offset, size() - __offset}; _LIBCPP_ASSERT(__count <= size() - __offset, "Offset + count out of range in span::subspan(offset, count)"); return {data() + __offset, __count}; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return _Extent; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return _Extent * sizeof(element_type); } [[nodiscard]] _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return _Extent == 0; } _LIBCPP_INLINE_VISIBILITY constexpr reference operator[](size_type __idx) const noexcept { _LIBCPP_ASSERT(__idx < size(), "span[] index out of bounds"); return __data[__idx]; } _LIBCPP_INLINE_VISIBILITY constexpr reference front() const noexcept { _LIBCPP_ASSERT(!empty(), "span::front() on empty span"); return __data[0]; } _LIBCPP_INLINE_VISIBILITY constexpr reference back() const noexcept { _LIBCPP_ASSERT(!empty(), "span::back() on empty span"); return __data[size()-1]; } _LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; } // [span.iter], span iterator support _LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); } _LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } _LIBCPP_INLINE_VISIBILITY span __as_bytes() const noexcept { return span{reinterpret_cast(data()), size_bytes()}; } _LIBCPP_INLINE_VISIBILITY span __as_writable_bytes() const noexcept { return span{reinterpret_cast(data()), size_bytes()}; } private: pointer __data; }; template class _LIBCPP_TEMPLATE_VIS span<_Tp, dynamic_extent> { private: public: // constants and types using element_type = _Tp; using value_type = remove_cv_t<_Tp>; using size_type = size_t; using difference_type = ptrdiff_t; using pointer = _Tp *; using const_pointer = const _Tp *; using reference = _Tp &; using const_reference = const _Tp &; #if (_LIBCPP_DEBUG_LEVEL == 2) || defined(_LIBCPP_ABI_SPAN_POINTER_ITERATORS) using iterator = pointer; #else using iterator = __wrap_iter; #endif using reverse_iterator = _VSTD::reverse_iterator; static constexpr size_type extent = dynamic_extent; // [span.cons], span constructors, copy, assignment, and destructor _LIBCPP_INLINE_VISIBILITY constexpr span() noexcept : __data{nullptr}, __size{0} {} constexpr span (const span&) noexcept = default; constexpr span& operator=(const span&) noexcept = default; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template && is_convertible_v > (*)[], element_type (*)[]>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(_It __first, size_type __count) : __data{_VSTD::to_address(__first)}, __size{__count} {} template < class _It, class _End, enable_if_t > (*)[], element_type (*)[]> && contiguous_iterator<_It> && sized_sentinel_for<_End, _It> && !is_convertible_v<_End, size_t>, nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(_It __first, _End __last) : __data(_VSTD::to_address(__first)), __size(__last - __first) {} #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) template _LIBCPP_INLINE_VISIBILITY constexpr span(type_identity_t (&__arr)[_Sz]) noexcept : __data{__arr}, __size{_Sz} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} template , nullptr_t> = nullptr> _LIBCPP_INLINE_VISIBILITY constexpr span(const array<_OtherElementType, _Sz>& __arr) noexcept : __data{__arr.data()}, __size{_Sz} {} -#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) +#if defined(_LIBCPP_HAS_NO_CONCEPTS) || defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr span(_Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {} + template ::value + >> + _LIBCPP_INLINE_VISIBILITY + constexpr span(const _Container& __c) : __data(std::data(__c)), __size{std::size(__c)} {} +#else template <__span_compatible_range _Range> _LIBCPP_INLINE_VISIBILITY constexpr span(_Range&& __r) : __data(ranges::data(__r)), __size{ranges::size(__r)} {} #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) template _LIBCPP_INLINE_VISIBILITY constexpr span(const span<_OtherElementType, _OtherExtent>& __other, enable_if_t< is_convertible_v<_OtherElementType(*)[], element_type (*)[]>, nullptr_t> = nullptr) noexcept : __data{__other.data()}, __size{__other.size()} {} // ~span() noexcept = default; template _LIBCPP_INLINE_VISIBILITY constexpr span first() const noexcept { _LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::first()"); return span{data(), _Count}; } template _LIBCPP_INLINE_VISIBILITY constexpr span last() const noexcept { _LIBCPP_ASSERT(_Count <= size(), "Count out of range in span::last()"); return span{data() + size() - _Count, _Count}; } _LIBCPP_INLINE_VISIBILITY constexpr span first(size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::first(count)"); return {data(), __count}; } _LIBCPP_INLINE_VISIBILITY constexpr span last (size_type __count) const noexcept { _LIBCPP_ASSERT(__count <= size(), "Count out of range in span::last(count)"); return {data() + size() - __count, __count}; } template _LIBCPP_INLINE_VISIBILITY constexpr span subspan() const noexcept { _LIBCPP_ASSERT(_Offset <= size(), "Offset out of range in span::subspan()"); _LIBCPP_ASSERT(_Count == dynamic_extent || _Count <= size() - _Offset, "Offset + count out of range in span::subspan()"); return span{data() + _Offset, _Count == dynamic_extent ? size() - _Offset : _Count}; } constexpr span _LIBCPP_INLINE_VISIBILITY subspan(size_type __offset, size_type __count = dynamic_extent) const noexcept { _LIBCPP_ASSERT(__offset <= size(), "Offset out of range in span::subspan(offset, count)"); _LIBCPP_ASSERT(__count <= size() || __count == dynamic_extent, "count out of range in span::subspan(offset, count)"); if (__count == dynamic_extent) return {data() + __offset, size() - __offset}; _LIBCPP_ASSERT(__count <= size() - __offset, "Offset + count out of range in span::subspan(offset, count)"); return {data() + __offset, __count}; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size() const noexcept { return __size; } _LIBCPP_INLINE_VISIBILITY constexpr size_type size_bytes() const noexcept { return __size * sizeof(element_type); } [[nodiscard]] _LIBCPP_INLINE_VISIBILITY constexpr bool empty() const noexcept { return __size == 0; } _LIBCPP_INLINE_VISIBILITY constexpr reference operator[](size_type __idx) const noexcept { _LIBCPP_ASSERT(__idx < size(), "span[] index out of bounds"); return __data[__idx]; } _LIBCPP_INLINE_VISIBILITY constexpr reference front() const noexcept { _LIBCPP_ASSERT(!empty(), "span[].front() on empty span"); return __data[0]; } _LIBCPP_INLINE_VISIBILITY constexpr reference back() const noexcept { _LIBCPP_ASSERT(!empty(), "span[].back() on empty span"); return __data[size()-1]; } _LIBCPP_INLINE_VISIBILITY constexpr pointer data() const noexcept { return __data; } // [span.iter], span iterator support _LIBCPP_INLINE_VISIBILITY constexpr iterator begin() const noexcept { return iterator(data()); } _LIBCPP_INLINE_VISIBILITY constexpr iterator end() const noexcept { return iterator(data() + size()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rbegin() const noexcept { return reverse_iterator(end()); } _LIBCPP_INLINE_VISIBILITY constexpr reverse_iterator rend() const noexcept { return reverse_iterator(begin()); } _LIBCPP_INLINE_VISIBILITY span __as_bytes() const noexcept { return {reinterpret_cast(data()), size_bytes()}; } _LIBCPP_INLINE_VISIBILITY span __as_writable_bytes() const noexcept { return {reinterpret_cast(data()), size_bytes()}; } private: pointer __data; size_type __size; }; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template inline constexpr bool ranges::enable_borrowed_range > = true; template inline constexpr bool ranges::enable_view> = true; #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) // as_bytes & as_writable_bytes template _LIBCPP_INLINE_VISIBILITY auto as_bytes(span<_Tp, _Extent> __s) noexcept -> decltype(__s.__as_bytes()) { return __s.__as_bytes(); } template _LIBCPP_INLINE_VISIBILITY auto as_writable_bytes(span<_Tp, _Extent> __s) noexcept -> enable_if_t, decltype(__s.__as_writable_bytes())> { return __s.__as_writable_bytes(); } #if !defined(_LIBCPP_HAS_NO_CONCEPTS) template span(_It, _EndOrSize) -> span>>; #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) template span(_Tp (&)[_Sz]) -> span<_Tp, _Sz>; template span(array<_Tp, _Sz>&) -> span<_Tp, _Sz>; template span(const array<_Tp, _Sz>&) -> span; #if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) template span(_Range&&) -> span>>; #endif #endif // _LIBCPP_STD_VER > 17 _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS #endif // _LIBCPP_SPAN diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 12db942f1db5..1ed2327ea630 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1,2098 +1,2104 @@ //===- Writer.cpp ---------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Writer.h" #include "COFFLinkerContext.h" #include "CallGraphSort.h" #include "Config.h" #include "DLL.h" #include "InputFiles.h" #include "LLDMapFile.h" #include "MapFile.h" #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Timer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/xxhash.h" #include #include #include #include #include using namespace llvm; using namespace llvm::COFF; using namespace llvm::object; using namespace llvm::support; using namespace llvm::support::endian; using namespace lld; using namespace lld::coff; /* To re-generate DOSProgram: $ cat > /tmp/DOSProgram.asm org 0 ; Copy cs to ds. push cs pop ds ; Point ds:dx at the $-terminated string. mov dx, str ; Int 21/AH=09h: Write string to standard output. mov ah, 0x9 int 0x21 ; Int 21/AH=4Ch: Exit with return code (in AL). mov ax, 0x4C01 int 0x21 str: db 'This program cannot be run in DOS mode.$' align 8, db 0 $ nasm -fbin /tmp/DOSProgram.asm -o /tmp/DOSProgram.bin $ xxd -i /tmp/DOSProgram.bin */ static unsigned char dosProgram[] = { 0x0e, 0x1f, 0xba, 0x0e, 0x00, 0xb4, 0x09, 0xcd, 0x21, 0xb8, 0x01, 0x4c, 0xcd, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6f, 0x67, 0x72, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6e, 0x6e, 0x6f, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x44, 0x4f, 0x53, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x2e, 0x24, 0x00, 0x00 }; static_assert(sizeof(dosProgram) % 8 == 0, "DOSProgram size must be multiple of 8"); static const int dosStubSize = sizeof(dos_header) + sizeof(dosProgram); static_assert(dosStubSize % 8 == 0, "DOSStub size must be multiple of 8"); static const int numberOfDataDirectory = 16; namespace { class DebugDirectoryChunk : public NonSectionChunk { public: DebugDirectoryChunk(COFFLinkerContext &c, const std::vector> &r, bool writeRepro) : records(r), writeRepro(writeRepro), ctx(c) {} size_t getSize() const override { return (records.size() + int(writeRepro)) * sizeof(debug_directory); } void writeTo(uint8_t *b) const override { auto *d = reinterpret_cast(b); for (const std::pair& record : records) { Chunk *c = record.second; OutputSection *os = ctx.getOutputSection(c); uint64_t offs = os->getFileOff() + (c->getRVA() - os->getRVA()); fillEntry(d, record.first, c->getSize(), c->getRVA(), offs); ++d; } if (writeRepro) { // FIXME: The COFF spec allows either a 0-sized entry to just say // "the timestamp field is really a hash", or a 4-byte size field // followed by that many bytes containing a longer hash (with the // lowest 4 bytes usually being the timestamp in little-endian order). // Consider storing the full 8 bytes computed by xxHash64 here. fillEntry(d, COFF::IMAGE_DEBUG_TYPE_REPRO, 0, 0, 0); } } void setTimeDateStamp(uint32_t timeDateStamp) { for (support::ulittle32_t *tds : timeDateStamps) *tds = timeDateStamp; } private: void fillEntry(debug_directory *d, COFF::DebugType debugType, size_t size, uint64_t rva, uint64_t offs) const { d->Characteristics = 0; d->TimeDateStamp = 0; d->MajorVersion = 0; d->MinorVersion = 0; d->Type = debugType; d->SizeOfData = size; d->AddressOfRawData = rva; d->PointerToRawData = offs; timeDateStamps.push_back(&d->TimeDateStamp); } mutable std::vector timeDateStamps; const std::vector> &records; bool writeRepro; COFFLinkerContext &ctx; }; class CVDebugRecordChunk : public NonSectionChunk { public: size_t getSize() const override { return sizeof(codeview::DebugInfo) + config->pdbAltPath.size() + 1; } void writeTo(uint8_t *b) const override { // Save off the DebugInfo entry to backfill the file signature (build id) // in Writer::writeBuildId buildId = reinterpret_cast(b); // variable sized field (PDB Path) char *p = reinterpret_cast(b + sizeof(*buildId)); if (!config->pdbAltPath.empty()) memcpy(p, config->pdbAltPath.data(), config->pdbAltPath.size()); p[config->pdbAltPath.size()] = '\0'; } mutable codeview::DebugInfo *buildId = nullptr; }; class ExtendedDllCharacteristicsChunk : public NonSectionChunk { public: ExtendedDllCharacteristicsChunk(uint32_t c) : characteristics(c) {} size_t getSize() const override { return 4; } void writeTo(uint8_t *buf) const override { write32le(buf, characteristics); } uint32_t characteristics = 0; }; // PartialSection represents a group of chunks that contribute to an // OutputSection. Collating a collection of PartialSections of same name and // characteristics constitutes the OutputSection. class PartialSectionKey { public: StringRef name; unsigned characteristics; bool operator<(const PartialSectionKey &other) const { int c = name.compare(other.name); if (c == 1) return false; if (c == 0) return characteristics < other.characteristics; return true; } }; // The writer writes a SymbolTable result to a file. class Writer { public: Writer(COFFLinkerContext &c) : buffer(errorHandler().outputBuffer), ctx(c) {} void run(); private: void createSections(); void createMiscChunks(); void createImportTables(); void appendImportThunks(); void locateImportTables(); void createExportTable(); void mergeSections(); void removeUnusedSections(); void assignAddresses(); void finalizeAddresses(); void removeEmptySections(); void assignOutputSectionIndices(); void createSymbolAndStringTable(); void openFile(StringRef outputPath); template void writeHeader(); void createSEHTable(); void createRuntimePseudoRelocs(); void insertCtorDtorSymbols(); void createGuardCFTables(); void markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols); void getSymbolsFromSections(ObjFile *file, ArrayRef symIdxChunks, std::vector &symbols); void maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym, bool hasFlag=false); void setSectionPermissions(); void writeSections(); void writeBuildId(); void sortSections(); void sortExceptionTable(); void sortCRTSectionChunks(std::vector &chunks); void addSyntheticIdata(); void fixPartialSectionChars(StringRef name, uint32_t chars); bool fixGnuImportChunks(); void fixTlsAlignment(); PartialSection *createPartialSection(StringRef name, uint32_t outChars); PartialSection *findPartialSection(StringRef name, uint32_t outChars); llvm::Optional createSymbol(Defined *d); size_t addEntryToStringTable(StringRef str); OutputSection *findSection(StringRef name); void addBaserels(); void addBaserelBlocks(std::vector &v); uint32_t getSizeOfInitializedData(); std::unique_ptr &buffer; std::map partialSections; std::vector strtab; std::vector outputSymtab; IdataContents idata; Chunk *importTableStart = nullptr; uint64_t importTableSize = 0; Chunk *edataStart = nullptr; Chunk *edataEnd = nullptr; Chunk *iatStart = nullptr; uint64_t iatSize = 0; DelayLoadContents delayIdata; EdataContents edata; bool setNoSEHCharacteristic = false; uint32_t tlsAlignment = 0; DebugDirectoryChunk *debugDirectory = nullptr; std::vector> debugRecords; CVDebugRecordChunk *buildId = nullptr; ArrayRef sectionTable; uint64_t fileSize; uint32_t pointerToSymbolTable = 0; uint64_t sizeOfImage; uint64_t sizeOfHeaders; OutputSection *textSec; OutputSection *rdataSec; OutputSection *buildidSec; OutputSection *dataSec; OutputSection *pdataSec; OutputSection *idataSec; OutputSection *edataSec; OutputSection *didatSec; OutputSection *rsrcSec; OutputSection *relocSec; OutputSection *ctorsSec; OutputSection *dtorsSec; // The first and last .pdata sections in the output file. // // We need to keep track of the location of .pdata in whichever section it // gets merged into so that we can sort its contents and emit a correct data // directory entry for the exception table. This is also the case for some // other sections (such as .edata) but because the contents of those sections // are entirely linker-generated we can keep track of their locations using // the chunks that the linker creates. All .pdata chunks come from input // files, so we need to keep track of them separately. Chunk *firstPdata = nullptr; Chunk *lastPdata; COFFLinkerContext &ctx; }; } // anonymous namespace void lld::coff::writeResult(COFFLinkerContext &ctx) { Writer(ctx).run(); } void OutputSection::addChunk(Chunk *c) { chunks.push_back(c); } void OutputSection::insertChunkAtStart(Chunk *c) { chunks.insert(chunks.begin(), c); } void OutputSection::setPermissions(uint32_t c) { header.Characteristics &= ~permMask; header.Characteristics |= c; } void OutputSection::merge(OutputSection *other) { chunks.insert(chunks.end(), other->chunks.begin(), other->chunks.end()); other->chunks.clear(); contribSections.insert(contribSections.end(), other->contribSections.begin(), other->contribSections.end()); other->contribSections.clear(); } // Write the section header to a given buffer. void OutputSection::writeHeaderTo(uint8_t *buf) { auto *hdr = reinterpret_cast(buf); *hdr = header; if (stringTableOff) { // If name is too long, write offset into the string table as a name. sprintf(hdr->Name, "/%d", stringTableOff); } else { assert(!config->debug || name.size() <= COFF::NameSize || (hdr->Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0); strncpy(hdr->Name, name.data(), std::min(name.size(), (size_t)COFF::NameSize)); } } void OutputSection::addContributingPartialSection(PartialSection *sec) { contribSections.push_back(sec); } // Check whether the target address S is in range from a relocation // of type relType at address P. static bool isInRange(uint16_t relType, uint64_t s, uint64_t p, int margin) { if (config->machine == ARMNT) { int64_t diff = AbsoluteDifference(s, p + 4) + margin; switch (relType) { case IMAGE_REL_ARM_BRANCH20T: return isInt<21>(diff); case IMAGE_REL_ARM_BRANCH24T: case IMAGE_REL_ARM_BLX23T: return isInt<25>(diff); default: return true; } } else if (config->machine == ARM64) { int64_t diff = AbsoluteDifference(s, p) + margin; switch (relType) { case IMAGE_REL_ARM64_BRANCH26: return isInt<28>(diff); case IMAGE_REL_ARM64_BRANCH19: return isInt<21>(diff); case IMAGE_REL_ARM64_BRANCH14: return isInt<16>(diff); default: return true; } } else { llvm_unreachable("Unexpected architecture"); } } // Return the last thunk for the given target if it is in range, // or create a new one. static std::pair getThunk(DenseMap &lastThunks, Defined *target, uint64_t p, uint16_t type, int margin) { Defined *&lastThunk = lastThunks[target->getRVA()]; if (lastThunk && isInRange(type, lastThunk->getRVA(), p, margin)) return {lastThunk, false}; Chunk *c; switch (config->machine) { case ARMNT: c = make(target); break; case ARM64: c = make(target); break; default: llvm_unreachable("Unexpected architecture"); } Defined *d = make("", c); lastThunk = d; return {d, true}; } // This checks all relocations, and for any relocation which isn't in range // it adds a thunk after the section chunk that contains the relocation. // If the latest thunk for the specific target is in range, that is used // instead of creating a new thunk. All range checks are done with the // specified margin, to make sure that relocations that originally are in // range, but only barely, also get thunks - in case other added thunks makes // the target go out of range. // // After adding thunks, we verify that all relocations are in range (with // no extra margin requirements). If this failed, we restart (throwing away // the previously created thunks) and retry with a wider margin. static bool createThunks(OutputSection *os, int margin) { bool addressesChanged = false; DenseMap lastThunks; DenseMap, uint32_t> thunkSymtabIndices; size_t thunksSize = 0; // Recheck Chunks.size() each iteration, since we can insert more // elements into it. for (size_t i = 0; i != os->chunks.size(); ++i) { SectionChunk *sc = dyn_cast_or_null(os->chunks[i]); if (!sc) continue; size_t thunkInsertionSpot = i + 1; // Try to get a good enough estimate of where new thunks will be placed. // Offset this by the size of the new thunks added so far, to make the // estimate slightly better. size_t thunkInsertionRVA = sc->getRVA() + sc->getSize() + thunksSize; ObjFile *file = sc->file; std::vector> relocReplacements; ArrayRef originalRelocs = file->getCOFFObj()->getRelocations(sc->header); for (size_t j = 0, e = originalRelocs.size(); j < e; ++j) { const coff_relocation &rel = originalRelocs[j]; Symbol *relocTarget = file->getSymbol(rel.SymbolTableIndex); // The estimate of the source address P should be pretty accurate, // but we don't know whether the target Symbol address should be // offset by thunksSize or not (or by some of thunksSize but not all of // it), giving us some uncertainty once we have added one thunk. uint64_t p = sc->getRVA() + rel.VirtualAddress + thunksSize; Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t s = sym->getRVA(); if (isInRange(rel.Type, s, p, margin)) continue; // If the target isn't in range, hook it up to an existing or new // thunk. Defined *thunk; bool wasNew; std::tie(thunk, wasNew) = getThunk(lastThunks, sym, p, rel.Type, margin); if (wasNew) { Chunk *thunkChunk = thunk->getChunk(); thunkChunk->setRVA( thunkInsertionRVA); // Estimate of where it will be located. os->chunks.insert(os->chunks.begin() + thunkInsertionSpot, thunkChunk); thunkInsertionSpot++; thunksSize += thunkChunk->getSize(); thunkInsertionRVA += thunkChunk->getSize(); addressesChanged = true; } // To redirect the relocation, add a symbol to the parent object file's // symbol table, and replace the relocation symbol table index with the // new index. auto insertion = thunkSymtabIndices.insert({{file, thunk}, ~0U}); uint32_t &thunkSymbolIndex = insertion.first->second; if (insertion.second) thunkSymbolIndex = file->addRangeThunkSymbol(thunk); relocReplacements.push_back({j, thunkSymbolIndex}); } // Get a writable copy of this section's relocations so they can be // modified. If the relocations point into the object file, allocate new // memory. Otherwise, this must be previously allocated memory that can be // modified in place. ArrayRef curRelocs = sc->getRelocs(); MutableArrayRef newRelocs; if (originalRelocs.data() == curRelocs.data()) { newRelocs = makeMutableArrayRef( bAlloc().Allocate(originalRelocs.size()), originalRelocs.size()); } else { newRelocs = makeMutableArrayRef( const_cast(curRelocs.data()), curRelocs.size()); } // Copy each relocation, but replace the symbol table indices which need // thunks. auto nextReplacement = relocReplacements.begin(); auto endReplacement = relocReplacements.end(); for (size_t i = 0, e = originalRelocs.size(); i != e; ++i) { newRelocs[i] = originalRelocs[i]; if (nextReplacement != endReplacement && nextReplacement->first == i) { newRelocs[i].SymbolTableIndex = nextReplacement->second; ++nextReplacement; } } sc->setRelocs(newRelocs); } return addressesChanged; } // Verify that all relocations are in range, with no extra margin requirements. static bool verifyRanges(const std::vector chunks) { for (Chunk *c : chunks) { SectionChunk *sc = dyn_cast_or_null(c); if (!sc) continue; ArrayRef relocs = sc->getRelocs(); for (size_t j = 0, e = relocs.size(); j < e; ++j) { const coff_relocation &rel = relocs[j]; Symbol *relocTarget = sc->file->getSymbol(rel.SymbolTableIndex); Defined *sym = dyn_cast_or_null(relocTarget); if (!sym) continue; uint64_t p = sc->getRVA() + rel.VirtualAddress; uint64_t s = sym->getRVA(); if (!isInRange(rel.Type, s, p, 0)) return false; } } return true; } // Assign addresses and add thunks if necessary. void Writer::finalizeAddresses() { assignAddresses(); if (config->machine != ARMNT && config->machine != ARM64) return; size_t origNumChunks = 0; for (OutputSection *sec : ctx.outputSections) { sec->origChunks = sec->chunks; origNumChunks += sec->chunks.size(); } int pass = 0; int margin = 1024 * 100; while (true) { // First check whether we need thunks at all, or if the previous pass of // adding them turned out ok. bool rangesOk = true; size_t numChunks = 0; for (OutputSection *sec : ctx.outputSections) { if (!verifyRanges(sec->chunks)) { rangesOk = false; break; } numChunks += sec->chunks.size(); } if (rangesOk) { if (pass > 0) log("Added " + Twine(numChunks - origNumChunks) + " thunks with " + "margin " + Twine(margin) + " in " + Twine(pass) + " passes"); return; } if (pass >= 10) fatal("adding thunks hasn't converged after " + Twine(pass) + " passes"); if (pass > 0) { // If the previous pass didn't work out, reset everything back to the // original conditions before retrying with a wider margin. This should // ideally never happen under real circumstances. for (OutputSection *sec : ctx.outputSections) sec->chunks = sec->origChunks; margin *= 2; } // Try adding thunks everywhere where it is needed, with a margin // to avoid things going out of range due to the added thunks. bool addressesChanged = false; for (OutputSection *sec : ctx.outputSections) addressesChanged |= createThunks(sec, margin); // If the verification above thought we needed thunks, we should have // added some. assert(addressesChanged); (void)addressesChanged; // Recalculate the layout for the whole image (and verify the ranges at // the start of the next round). assignAddresses(); pass++; } } // The main function of the writer. void Writer::run() { ScopedTimer t1(ctx.codeLayoutTimer); createImportTables(); createSections(); appendImportThunks(); // Import thunks must be added before the Control Flow Guard tables are added. createMiscChunks(); createExportTable(); mergeSections(); removeUnusedSections(); finalizeAddresses(); removeEmptySections(); assignOutputSectionIndices(); setSectionPermissions(); createSymbolAndStringTable(); if (fileSize > UINT32_MAX) fatal("image size (" + Twine(fileSize) + ") " + "exceeds maximum allowable size (" + Twine(UINT32_MAX) + ")"); openFile(config->outputFile); if (config->is64()) { writeHeader(); } else { writeHeader(); } writeSections(); sortExceptionTable(); // Fix up the alignment in the TLS Directory's characteristic field, // if a specific alignment value is needed if (tlsAlignment) fixTlsAlignment(); t1.stop(); if (!config->pdbPath.empty() && config->debug) { assert(buildId); createPDB(ctx, sectionTable, buildId->buildId); } writeBuildId(); writeLLDMapFile(ctx); writeMapFile(ctx); if (errorCount()) return; ScopedTimer t2(ctx.outputCommitTimer); if (auto e = buffer->commit()) fatal("failed to write the output file: " + toString(std::move(e))); } static StringRef getOutputSectionName(StringRef name) { StringRef s = name.split('$').first; // Treat a later period as a separator for MinGW, for sections like // ".ctors.01234". return s.substr(0, s.find('.', 1)); } // For /order. static void sortBySectionOrder(std::vector &chunks) { auto getPriority = [](const Chunk *c) { if (auto *sec = dyn_cast(c)) if (sec->sym) return config->order.lookup(sec->sym->getName()); return 0; }; llvm::stable_sort(chunks, [=](const Chunk *a, const Chunk *b) { return getPriority(a) < getPriority(b); }); } // Change the characteristics of existing PartialSections that belong to the // section Name to Chars. void Writer::fixPartialSectionChars(StringRef name, uint32_t chars) { for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef curName = pSec->name; if (!curName.consume_front(name) || (!curName.empty() && !curName.startswith("$"))) continue; if (pSec->characteristics == chars) continue; PartialSection *destSec = createPartialSection(pSec->name, chars); destSec->chunks.insert(destSec->chunks.end(), pSec->chunks.begin(), pSec->chunks.end()); pSec->chunks.clear(); } } // Sort concrete section chunks from GNU import libraries. // // GNU binutils doesn't use short import files, but instead produces import // libraries that consist of object files, with section chunks for the .idata$* // sections. These are linked just as regular static libraries. Each import // library consists of one header object, one object file for every imported // symbol, and one trailer object. In order for the .idata tables/lists to // be formed correctly, the section chunks within each .idata$* section need // to be grouped by library, and sorted alphabetically within each library // (which makes sure the header comes first and the trailer last). bool Writer::fixGnuImportChunks() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; // Make sure all .idata$* section chunks are mapped as RDATA in order to // be sorted into the same sections as our own synthesized .idata chunks. fixPartialSectionChars(".idata", rdata); bool hasIdata = false; // Sort all .idata$* chunks, grouping chunks from the same library, // with alphabetical ordering of the object fils within a library. for (auto it : partialSections) { PartialSection *pSec = it.second; if (!pSec->name.startswith(".idata")) continue; if (!pSec->chunks.empty()) hasIdata = true; llvm::stable_sort(pSec->chunks, [&](Chunk *s, Chunk *t) { SectionChunk *sc1 = dyn_cast_or_null(s); SectionChunk *sc2 = dyn_cast_or_null(t); if (!sc1 || !sc2) { // if SC1, order them ascending. If SC2 or both null, // S is not less than T. return sc1 != nullptr; } // Make a string with "libraryname/objectfile" for sorting, achieving // both grouping by library and sorting of objects within a library, // at once. std::string key1 = (sc1->file->parentName + "/" + sc1->file->getName()).str(); std::string key2 = (sc2->file->parentName + "/" + sc2->file->getName()).str(); return key1 < key2; }); } return hasIdata; } // Add generated idata chunks, for imported symbols and DLLs, and a // terminator in .idata$2. void Writer::addSyntheticIdata() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; idata.create(); // Add the .idata content in the right section groups, to allow // chunks from other linked in object files to be grouped together. // See Microsoft PE/COFF spec 5.4 for details. auto add = [&](StringRef n, std::vector &v) { PartialSection *pSec = createPartialSection(n, rdata); pSec->chunks.insert(pSec->chunks.end(), v.begin(), v.end()); }; // The loader assumes a specific order of data. // Add each type in the correct order. add(".idata$2", idata.dirs); add(".idata$4", idata.lookups); add(".idata$5", idata.addresses); if (!idata.hints.empty()) add(".idata$6", idata.hints); add(".idata$7", idata.dllNames); } // Locate the first Chunk and size of the import directory list and the // IAT. void Writer::locateImportTables() { uint32_t rdata = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ; if (PartialSection *importDirs = findPartialSection(".idata$2", rdata)) { if (!importDirs->chunks.empty()) importTableStart = importDirs->chunks.front(); for (Chunk *c : importDirs->chunks) importTableSize += c->getSize(); } if (PartialSection *importAddresses = findPartialSection(".idata$5", rdata)) { if (!importAddresses->chunks.empty()) iatStart = importAddresses->chunks.front(); for (Chunk *c : importAddresses->chunks) iatSize += c->getSize(); } } // Return whether a SectionChunk's suffix (the dollar and any trailing // suffix) should be removed and sorted into the main suffixless // PartialSection. static bool shouldStripSectionSuffix(SectionChunk *sc, StringRef name) { // On MinGW, comdat groups are formed by putting the comdat group name // after the '$' in the section name. For .eh_frame$, that must // still be sorted before the .eh_frame trailer from crtend.o, thus just // strip the section name trailer. For other sections, such as // .tls$$ (where non-comdat .tls symbols are otherwise stored in // ".tls$"), they must be strictly sorted after .tls. And for the // hypothetical case of comdat .CRT$XCU, we definitely need to keep the // suffix for sorting. Thus, to play it safe, only strip the suffix for // the standard sections. if (!config->mingw) return false; if (!sc || !sc->isCOMDAT()) return false; return name.startswith(".text$") || name.startswith(".data$") || name.startswith(".rdata$") || name.startswith(".pdata$") || name.startswith(".xdata$") || name.startswith(".eh_frame$"); } void Writer::sortSections() { if (!config->callGraphProfile.empty()) { DenseMap order = computeCallGraphProfileOrder(ctx); for (auto it : order) { if (DefinedRegular *sym = it.first->sym) config->order[sym->getName()] = it.second; } } if (!config->order.empty()) for (auto it : partialSections) sortBySectionOrder(it.second->chunks); } // Create output section objects and add them to OutputSections. void Writer::createSections() { // First, create the builtin sections. const uint32_t data = IMAGE_SCN_CNT_INITIALIZED_DATA; const uint32_t bss = IMAGE_SCN_CNT_UNINITIALIZED_DATA; const uint32_t code = IMAGE_SCN_CNT_CODE; const uint32_t discardable = IMAGE_SCN_MEM_DISCARDABLE; const uint32_t r = IMAGE_SCN_MEM_READ; const uint32_t w = IMAGE_SCN_MEM_WRITE; const uint32_t x = IMAGE_SCN_MEM_EXECUTE; SmallDenseMap, OutputSection *> sections; auto createSection = [&](StringRef name, uint32_t outChars) { OutputSection *&sec = sections[{name, outChars}]; if (!sec) { sec = make(name, outChars); ctx.outputSections.push_back(sec); } return sec; }; // Try to match the section order used by link.exe. textSec = createSection(".text", code | r | x); createSection(".bss", bss | r | w); rdataSec = createSection(".rdata", data | r); buildidSec = createSection(".buildid", data | r); dataSec = createSection(".data", data | r | w); pdataSec = createSection(".pdata", data | r); idataSec = createSection(".idata", data | r); edataSec = createSection(".edata", data | r); didatSec = createSection(".didat", data | r); rsrcSec = createSection(".rsrc", data | r); relocSec = createSection(".reloc", data | discardable | r); ctorsSec = createSection(".ctors", data | r | w); dtorsSec = createSection(".dtors", data | r | w); // Then bin chunks by name and output characteristics. for (Chunk *c : ctx.symtab.getChunks()) { auto *sc = dyn_cast(c); if (sc && !sc->live) { if (config->verbose) sc->printDiscardedMessage(); continue; } StringRef name = c->getSectionName(); if (shouldStripSectionSuffix(sc, name)) name = name.split('$').first; if (name.startswith(".tls")) tlsAlignment = std::max(tlsAlignment, c->getAlignment()); PartialSection *pSec = createPartialSection(name, c->getOutputCharacteristics()); pSec->chunks.push_back(c); } fixPartialSectionChars(".rsrc", data | r); fixPartialSectionChars(".edata", data | r); // Even in non MinGW cases, we might need to link against GNU import // libraries. bool hasIdata = fixGnuImportChunks(); if (!idata.empty()) hasIdata = true; if (hasIdata) addSyntheticIdata(); sortSections(); if (hasIdata) locateImportTables(); // Then create an OutputSection for each section. // '$' and all following characters in input section names are // discarded when determining output section. So, .text$foo // contributes to .text, for example. See PE/COFF spec 3.2. for (auto it : partialSections) { PartialSection *pSec = it.second; StringRef name = getOutputSectionName(pSec->name); uint32_t outChars = pSec->characteristics; if (name == ".CRT") { // In link.exe, there is a special case for the I386 target where .CRT // sections are treated as if they have output characteristics DATA | R if // their characteristics are DATA | R | W. This implements the same // special case for all architectures. outChars = data | r; log("Processing section " + pSec->name + " -> " + name); sortCRTSectionChunks(pSec->chunks); } OutputSection *sec = createSection(name, outChars); for (Chunk *c : pSec->chunks) sec->addChunk(c); sec->addContributingPartialSection(pSec); } // Finally, move some output sections to the end. auto sectionOrder = [&](const OutputSection *s) { // Move DISCARDABLE (or non-memory-mapped) sections to the end of file // because the loader cannot handle holes. Stripping can remove other // discardable ones than .reloc, which is first of them (created early). - if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) + if (s->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) { + // Move discardable sections named .debug_ to the end, after other + // discardable sections. Stripping only removes the sections named + // .debug_* - thus try to avoid leaving holes after stripping. + if (s->name.startswith(".debug_")) + return 3; return 2; + } // .rsrc should come at the end of the non-discardable sections because its // size may change by the Win32 UpdateResources() function, causing // subsequent sections to move (see https://crbug.com/827082). if (s == rsrcSec) return 1; return 0; }; llvm::stable_sort(ctx.outputSections, [&](const OutputSection *s, const OutputSection *t) { return sectionOrder(s) < sectionOrder(t); }); } void Writer::createMiscChunks() { for (MergeChunk *p : ctx.mergeChunkInstances) { if (p) { p->finalizeContents(); rdataSec->addChunk(p); } } // Create thunks for locally-dllimported symbols. if (!ctx.symtab.localImportChunks.empty()) { for (Chunk *c : ctx.symtab.localImportChunks) rdataSec->addChunk(c); } // Create Debug Information Chunks OutputSection *debugInfoSec = config->mingw ? buildidSec : rdataSec; if (config->debug || config->repro || config->cetCompat) { debugDirectory = make(ctx, debugRecords, config->repro); debugDirectory->setAlignment(4); debugInfoSec->addChunk(debugDirectory); } if (config->debug) { // Make a CVDebugRecordChunk even when /DEBUG:CV is not specified. We // output a PDB no matter what, and this chunk provides the only means of // allowing a debugger to match a PDB and an executable. So we need it even // if we're ultimately not going to write CodeView data to the PDB. buildId = make(); debugRecords.push_back({COFF::IMAGE_DEBUG_TYPE_CODEVIEW, buildId}); } if (config->cetCompat) { debugRecords.push_back({COFF::IMAGE_DEBUG_TYPE_EX_DLLCHARACTERISTICS, make( IMAGE_DLL_CHARACTERISTICS_EX_CET_COMPAT)}); } // Align and add each chunk referenced by the debug data directory. for (std::pair r : debugRecords) { r.second->setAlignment(4); debugInfoSec->addChunk(r.second); } // Create SEH table. x86-only. if (config->safeSEH) createSEHTable(); // Create /guard:cf tables if requested. if (config->guardCF != GuardCFLevel::Off) createGuardCFTables(); if (config->autoImport) createRuntimePseudoRelocs(); if (config->mingw) insertCtorDtorSymbols(); } // Create .idata section for the DLL-imported symbol table. // The format of this section is inherently Windows-specific. // IdataContents class abstracted away the details for us, // so we just let it create chunks and add them to the section. void Writer::createImportTables() { // Initialize DLLOrder so that import entries are ordered in // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) for (ImportFile *file : ctx.importFileInstances) { if (!file->live) continue; std::string dll = StringRef(file->dllName).lower(); if (config->dllOrder.count(dll) == 0) config->dllOrder[dll] = config->dllOrder.size(); if (file->impSym && !isa(file->impSym)) fatal(toString(*file->impSym) + " was replaced"); DefinedImportData *impSym = cast_or_null(file->impSym); if (config->delayLoads.count(StringRef(file->dllName).lower())) { if (!file->thunkSym) fatal("cannot delay-load " + toString(file) + " due to import of data: " + toString(*impSym)); delayIdata.add(impSym); } else { idata.add(impSym); } } } void Writer::appendImportThunks() { if (ctx.importFileInstances.empty()) return; for (ImportFile *file : ctx.importFileInstances) { if (!file->live) continue; if (!file->thunkSym) continue; if (!isa(file->thunkSym)) fatal(toString(*file->thunkSym) + " was replaced"); DefinedImportThunk *thunk = cast(file->thunkSym); if (file->thunkLive) textSec->addChunk(thunk->getChunk()); } if (!delayIdata.empty()) { Defined *helper = cast(config->delayLoadHelper); delayIdata.create(ctx, helper); for (Chunk *c : delayIdata.getChunks()) didatSec->addChunk(c); for (Chunk *c : delayIdata.getDataChunks()) dataSec->addChunk(c); for (Chunk *c : delayIdata.getCodeChunks()) textSec->addChunk(c); } } void Writer::createExportTable() { if (!edataSec->chunks.empty()) { // Allow using a custom built export table from input object files, instead // of having the linker synthesize the tables. if (config->hadExplicitExports) warn("literal .edata sections override exports"); } else if (!config->exports.empty()) { for (Chunk *c : edata.chunks) edataSec->addChunk(c); } if (!edataSec->chunks.empty()) { edataStart = edataSec->chunks.front(); edataEnd = edataSec->chunks.back(); } // Warn on exported deleting destructor. for (auto e : config->exports) if (e.sym && e.sym->getName().startswith("??_G")) warn("export of deleting dtor: " + toString(*e.sym)); } void Writer::removeUnusedSections() { // Remove sections that we can be sure won't get content, to avoid // allocating space for their section headers. auto isUnused = [this](OutputSection *s) { if (s == relocSec) return false; // This section is populated later. // MergeChunks have zero size at this point, as their size is finalized // later. Only remove sections that have no Chunks at all. return s->chunks.empty(); }; llvm::erase_if(ctx.outputSections, isUnused); } // The Windows loader doesn't seem to like empty sections, // so we remove them if any. void Writer::removeEmptySections() { auto isEmpty = [](OutputSection *s) { return s->getVirtualSize() == 0; }; llvm::erase_if(ctx.outputSections, isEmpty); } void Writer::assignOutputSectionIndices() { // Assign final output section indices, and assign each chunk to its output // section. uint32_t idx = 1; for (OutputSection *os : ctx.outputSections) { os->sectionIndex = idx; for (Chunk *c : os->chunks) c->setOutputSectionIdx(idx); ++idx; } // Merge chunks are containers of chunks, so assign those an output section // too. for (MergeChunk *mc : ctx.mergeChunkInstances) if (mc) for (SectionChunk *sc : mc->sections) if (sc && sc->live) sc->setOutputSectionIdx(mc->getOutputSectionIdx()); } size_t Writer::addEntryToStringTable(StringRef str) { assert(str.size() > COFF::NameSize); size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field strtab.insert(strtab.end(), str.begin(), str.end()); strtab.push_back('\0'); return offsetOfEntry; } Optional Writer::createSymbol(Defined *def) { coff_symbol16 sym; switch (def->kind()) { case Symbol::DefinedAbsoluteKind: sym.Value = def->getRVA(); sym.SectionNumber = IMAGE_SYM_ABSOLUTE; break; case Symbol::DefinedSyntheticKind: // Relative symbols are unrepresentable in a COFF symbol table. return None; default: { // Don't write symbols that won't be written to the output to the symbol // table. Chunk *c = def->getChunk(); if (!c) return None; OutputSection *os = ctx.getOutputSection(c); if (!os) return None; sym.Value = def->getRVA() - os->getRVA(); sym.SectionNumber = os->sectionIndex; break; } } // Symbols that are runtime pseudo relocations don't point to the actual // symbol data itself (as they are imported), but points to the IAT entry // instead. Avoid emitting them to the symbol table, as they can confuse // debuggers. if (def->isRuntimePseudoReloc) return None; StringRef name = def->getName(); if (name.size() > COFF::NameSize) { sym.Name.Offset.Zeroes = 0; sym.Name.Offset.Offset = addEntryToStringTable(name); } else { memset(sym.Name.ShortName, 0, COFF::NameSize); memcpy(sym.Name.ShortName, name.data(), name.size()); } if (auto *d = dyn_cast(def)) { COFFSymbolRef ref = d->getCOFFSymbol(); sym.Type = ref.getType(); sym.StorageClass = ref.getStorageClass(); } else { sym.Type = IMAGE_SYM_TYPE_NULL; sym.StorageClass = IMAGE_SYM_CLASS_EXTERNAL; } sym.NumberOfAuxSymbols = 0; return sym; } void Writer::createSymbolAndStringTable() { // PE/COFF images are limited to 8 byte section names. Longer names can be // supported by writing a non-standard string table, but this string table is // not mapped at runtime and the long names will therefore be inaccessible. // link.exe always truncates section names to 8 bytes, whereas binutils always // preserves long section names via the string table. LLD adopts a hybrid // solution where discardable sections have long names preserved and // non-discardable sections have their names truncated, to ensure that any // section which is mapped at runtime also has its name mapped at runtime. for (OutputSection *sec : ctx.outputSections) { if (sec->name.size() <= COFF::NameSize) continue; if ((sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) == 0) continue; if (config->warnLongSectionNames) { warn("section name " + sec->name + " is longer than 8 characters and will use a non-standard string " "table"); } sec->setStringTableOff(addEntryToStringTable(sec->name)); } if (config->debugDwarf || config->debugSymtab) { for (ObjFile *file : ctx.objFileInstances) { for (Symbol *b : file->getSymbols()) { auto *d = dyn_cast_or_null(b); if (!d || d->writtenToSymtab) continue; d->writtenToSymtab = true; if (auto *dc = dyn_cast_or_null(d)) { COFFSymbolRef symRef = dc->getCOFFSymbol(); if (symRef.isSectionDefinition() || symRef.getStorageClass() == COFF::IMAGE_SYM_CLASS_LABEL) continue; } if (Optional sym = createSymbol(d)) outputSymtab.push_back(*sym); } } } if (outputSymtab.empty() && strtab.empty()) return; // We position the symbol table to be adjacent to the end of the last section. uint64_t fileOff = fileSize; pointerToSymbolTable = fileOff; fileOff += outputSymtab.size() * sizeof(coff_symbol16); fileOff += 4 + strtab.size(); fileSize = alignTo(fileOff, config->fileAlign); } void Writer::mergeSections() { if (!pdataSec->chunks.empty()) { firstPdata = pdataSec->chunks.front(); lastPdata = pdataSec->chunks.back(); } for (auto &p : config->merge) { StringRef toName = p.second; if (p.first == toName) continue; StringSet<> names; while (true) { if (!names.insert(toName).second) fatal("/merge: cycle found for section '" + p.first + "'"); auto i = config->merge.find(toName); if (i == config->merge.end()) break; toName = i->second; } OutputSection *from = findSection(p.first); OutputSection *to = findSection(toName); if (!from) continue; if (!to) { from->name = toName; continue; } to->merge(from); } } // Visits all sections to assign incremental, non-overlapping RVAs and // file offsets. void Writer::assignAddresses() { sizeOfHeaders = dosStubSize + sizeof(PEMagic) + sizeof(coff_file_header) + sizeof(data_directory) * numberOfDataDirectory + sizeof(coff_section) * ctx.outputSections.size(); sizeOfHeaders += config->is64() ? sizeof(pe32plus_header) : sizeof(pe32_header); sizeOfHeaders = alignTo(sizeOfHeaders, config->fileAlign); fileSize = sizeOfHeaders; // The first page is kept unmapped. uint64_t rva = alignTo(sizeOfHeaders, config->align); for (OutputSection *sec : ctx.outputSections) { if (sec == relocSec) addBaserels(); uint64_t rawSize = 0, virtualSize = 0; sec->header.VirtualAddress = rva; // If /FUNCTIONPADMIN is used, functions are padded in order to create a // hotpatchable image. const bool isCodeSection = (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && (sec->header.Characteristics & IMAGE_SCN_MEM_READ) && (sec->header.Characteristics & IMAGE_SCN_MEM_EXECUTE); uint32_t padding = isCodeSection ? config->functionPadMin : 0; for (Chunk *c : sec->chunks) { if (padding && c->isHotPatchable()) virtualSize += padding; virtualSize = alignTo(virtualSize, c->getAlignment()); c->setRVA(rva + virtualSize); virtualSize += c->getSize(); if (c->hasData) rawSize = alignTo(virtualSize, config->fileAlign); } if (virtualSize > UINT32_MAX) error("section larger than 4 GiB: " + sec->name); sec->header.VirtualSize = virtualSize; sec->header.SizeOfRawData = rawSize; if (rawSize != 0) sec->header.PointerToRawData = fileSize; rva += alignTo(virtualSize, config->align); fileSize += alignTo(rawSize, config->fileAlign); } sizeOfImage = alignTo(rva, config->align); // Assign addresses to sections in MergeChunks. for (MergeChunk *mc : ctx.mergeChunkInstances) if (mc) mc->assignSubsectionRVAs(); } template void Writer::writeHeader() { // Write DOS header. For backwards compatibility, the first part of a PE/COFF // executable consists of an MS-DOS MZ executable. If the executable is run // under DOS, that program gets run (usually to just print an error message). // When run under Windows, the loader looks at AddressOfNewExeHeader and uses // the PE header instead. uint8_t *buf = buffer->getBufferStart(); auto *dos = reinterpret_cast(buf); buf += sizeof(dos_header); dos->Magic[0] = 'M'; dos->Magic[1] = 'Z'; dos->UsedBytesInTheLastPage = dosStubSize % 512; dos->FileSizeInPages = divideCeil(dosStubSize, 512); dos->HeaderSizeInParagraphs = sizeof(dos_header) / 16; dos->AddressOfRelocationTable = sizeof(dos_header); dos->AddressOfNewExeHeader = dosStubSize; // Write DOS program. memcpy(buf, dosProgram, sizeof(dosProgram)); buf += sizeof(dosProgram); // Write PE magic memcpy(buf, PEMagic, sizeof(PEMagic)); buf += sizeof(PEMagic); // Write COFF header auto *coff = reinterpret_cast(buf); buf += sizeof(*coff); coff->Machine = config->machine; coff->NumberOfSections = ctx.outputSections.size(); coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; if (config->largeAddressAware) coff->Characteristics |= IMAGE_FILE_LARGE_ADDRESS_AWARE; if (!config->is64()) coff->Characteristics |= IMAGE_FILE_32BIT_MACHINE; if (config->dll) coff->Characteristics |= IMAGE_FILE_DLL; if (config->driverUponly) coff->Characteristics |= IMAGE_FILE_UP_SYSTEM_ONLY; if (!config->relocatable) coff->Characteristics |= IMAGE_FILE_RELOCS_STRIPPED; if (config->swaprunCD) coff->Characteristics |= IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP; if (config->swaprunNet) coff->Characteristics |= IMAGE_FILE_NET_RUN_FROM_SWAP; coff->SizeOfOptionalHeader = sizeof(PEHeaderTy) + sizeof(data_directory) * numberOfDataDirectory; // Write PE header auto *pe = reinterpret_cast(buf); buf += sizeof(*pe); pe->Magic = config->is64() ? PE32Header::PE32_PLUS : PE32Header::PE32; // If {Major,Minor}LinkerVersion is left at 0.0, then for some // reason signing the resulting PE file with Authenticode produces a // signature that fails to validate on Windows 7 (but is OK on 10). // Set it to 14.0, which is what VS2015 outputs, and which avoids // that problem. pe->MajorLinkerVersion = 14; pe->MinorLinkerVersion = 0; pe->ImageBase = config->imageBase; pe->SectionAlignment = config->align; pe->FileAlignment = config->fileAlign; pe->MajorImageVersion = config->majorImageVersion; pe->MinorImageVersion = config->minorImageVersion; pe->MajorOperatingSystemVersion = config->majorOSVersion; pe->MinorOperatingSystemVersion = config->minorOSVersion; pe->MajorSubsystemVersion = config->majorSubsystemVersion; pe->MinorSubsystemVersion = config->minorSubsystemVersion; pe->Subsystem = config->subsystem; pe->SizeOfImage = sizeOfImage; pe->SizeOfHeaders = sizeOfHeaders; if (!config->noEntry) { Defined *entry = cast(config->entry); pe->AddressOfEntryPoint = entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (config->machine == ARMNT) pe->AddressOfEntryPoint |= 1; } pe->SizeOfStackReserve = config->stackReserve; pe->SizeOfStackCommit = config->stackCommit; pe->SizeOfHeapReserve = config->heapReserve; pe->SizeOfHeapCommit = config->heapCommit; if (config->appContainer) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_APPCONTAINER; if (config->driverWdm) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER; if (config->dynamicBase) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE; if (config->highEntropyVA) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA; if (!config->allowBind) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_BIND; if (config->nxCompat) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NX_COMPAT; if (!config->allowIsolation) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION; if (config->guardCF != GuardCFLevel::Off) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_GUARD_CF; if (config->integrityCheck) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY; if (setNoSEHCharacteristic || config->noSEH) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_NO_SEH; if (config->terminalServerAware) pe->DLLCharacteristics |= IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE; pe->NumberOfRvaAndSize = numberOfDataDirectory; if (textSec->getVirtualSize()) { pe->BaseOfCode = textSec->getRVA(); pe->SizeOfCode = textSec->getRawSize(); } pe->SizeOfInitializedData = getSizeOfInitializedData(); // Write data directory auto *dir = reinterpret_cast(buf); buf += sizeof(*dir) * numberOfDataDirectory; if (edataStart) { dir[EXPORT_TABLE].RelativeVirtualAddress = edataStart->getRVA(); dir[EXPORT_TABLE].Size = edataEnd->getRVA() + edataEnd->getSize() - edataStart->getRVA(); } if (importTableStart) { dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA(); dir[IMPORT_TABLE].Size = importTableSize; } if (iatStart) { dir[IAT].RelativeVirtualAddress = iatStart->getRVA(); dir[IAT].Size = iatSize; } if (rsrcSec->getVirtualSize()) { dir[RESOURCE_TABLE].RelativeVirtualAddress = rsrcSec->getRVA(); dir[RESOURCE_TABLE].Size = rsrcSec->getVirtualSize(); } if (firstPdata) { dir[EXCEPTION_TABLE].RelativeVirtualAddress = firstPdata->getRVA(); dir[EXCEPTION_TABLE].Size = lastPdata->getRVA() + lastPdata->getSize() - firstPdata->getRVA(); } if (relocSec->getVirtualSize()) { dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA(); dir[BASE_RELOCATION_TABLE].Size = relocSec->getVirtualSize(); } if (Symbol *sym = ctx.symtab.findUnderscore("_tls_used")) { if (Defined *b = dyn_cast(sym)) { dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA(); dir[TLS_TABLE].Size = config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); } } if (debugDirectory) { dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA(); dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize(); } if (Symbol *sym = ctx.symtab.findUnderscore("_load_config_used")) { if (auto *b = dyn_cast(sym)) { SectionChunk *sc = b->getChunk(); assert(b->getRVA() >= sc->getRVA()); uint64_t offsetInChunk = b->getRVA() - sc->getRVA(); if (!sc->hasData || offsetInChunk + 4 > sc->getSize()) fatal("_load_config_used is malformed"); ArrayRef secContents = sc->getContents(); uint32_t loadConfigSize = *reinterpret_cast(&secContents[offsetInChunk]); if (offsetInChunk + loadConfigSize > sc->getSize()) fatal("_load_config_used is too large"); dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = b->getRVA(); dir[LOAD_CONFIG_TABLE].Size = loadConfigSize; } } if (!delayIdata.empty()) { dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = delayIdata.getDirRVA(); dir[DELAY_IMPORT_DESCRIPTOR].Size = delayIdata.getDirSize(); } // Write section table for (OutputSection *sec : ctx.outputSections) { sec->writeHeaderTo(buf); buf += sizeof(coff_section); } sectionTable = ArrayRef( buf - ctx.outputSections.size() * sizeof(coff_section), buf); if (outputSymtab.empty() && strtab.empty()) return; coff->PointerToSymbolTable = pointerToSymbolTable; uint32_t numberOfSymbols = outputSymtab.size(); coff->NumberOfSymbols = numberOfSymbols; auto *symbolTable = reinterpret_cast( buffer->getBufferStart() + coff->PointerToSymbolTable); for (size_t i = 0; i != numberOfSymbols; ++i) symbolTable[i] = outputSymtab[i]; // Create the string table, it follows immediately after the symbol table. // The first 4 bytes is length including itself. buf = reinterpret_cast(&symbolTable[numberOfSymbols]); write32le(buf, strtab.size() + 4); if (!strtab.empty()) memcpy(buf + 4, strtab.data(), strtab.size()); } void Writer::openFile(StringRef path) { buffer = CHECK( FileOutputBuffer::create(path, fileSize, FileOutputBuffer::F_executable), "failed to open " + path); } void Writer::createSEHTable() { SymbolRVASet handlers; for (ObjFile *file : ctx.objFileInstances) { if (!file->hasSafeSEH()) error("/safeseh: " + file->getName() + " is not compatible with SEH"); markSymbolsForRVATable(file, file->getSXDataChunks(), handlers); } // Set the "no SEH" characteristic if there really were no handlers, or if // there is no load config object to point to the table of handlers. setNoSEHCharacteristic = handlers.empty() || !ctx.symtab.findUnderscore("_load_config_used"); maybeAddRVATable(std::move(handlers), "__safe_se_handler_table", "__safe_se_handler_count"); } // Add a symbol to an RVA set. Two symbols may have the same RVA, but an RVA set // cannot contain duplicates. Therefore, the set is uniqued by Chunk and the // symbol's offset into that Chunk. static void addSymbolToRVASet(SymbolRVASet &rvaSet, Defined *s) { Chunk *c = s->getChunk(); if (auto *sc = dyn_cast(c)) c = sc->repl; // Look through ICF replacement. uint32_t off = s->getRVA() - (c ? c->getRVA() : 0); rvaSet.insert({c, off}); } // Given a symbol, add it to the GFIDs table if it is a live, defined, function // symbol in an executable section. static void maybeAddAddressTakenFunction(SymbolRVASet &addressTakenSyms, Symbol *s) { if (!s) return; switch (s->kind()) { case Symbol::DefinedLocalImportKind: case Symbol::DefinedImportDataKind: // Defines an __imp_ pointer, so it is data, so it is ignored. break; case Symbol::DefinedCommonKind: // Common is always data, so it is ignored. break; case Symbol::DefinedAbsoluteKind: case Symbol::DefinedSyntheticKind: // Absolute is never code, synthetic generally isn't and usually isn't // determinable. break; case Symbol::LazyArchiveKind: case Symbol::LazyObjectKind: case Symbol::LazyDLLSymbolKind: case Symbol::UndefinedKind: // Undefined symbols resolve to zero, so they don't have an RVA. Lazy // symbols shouldn't have relocations. break; case Symbol::DefinedImportThunkKind: // Thunks are always code, include them. addSymbolToRVASet(addressTakenSyms, cast(s)); break; case Symbol::DefinedRegularKind: { // This is a regular, defined, symbol from a COFF file. Mark the symbol as // address taken if the symbol type is function and it's in an executable // section. auto *d = cast(s); if (d->getCOFFSymbol().getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) { SectionChunk *sc = dyn_cast(d->getChunk()); if (sc && sc->live && sc->getOutputCharacteristics() & IMAGE_SCN_MEM_EXECUTE) addSymbolToRVASet(addressTakenSyms, d); } break; } } } // Visit all relocations from all section contributions of this object file and // mark the relocation target as address-taken. static void markSymbolsWithRelocations(ObjFile *file, SymbolRVASet &usedSymbols) { for (Chunk *c : file->getChunks()) { // We only care about live section chunks. Common chunks and other chunks // don't generally contain relocations. SectionChunk *sc = dyn_cast(c); if (!sc || !sc->live) continue; for (const coff_relocation &reloc : sc->getRelocs()) { if (config->machine == I386 && reloc.Type == COFF::IMAGE_REL_I386_REL32) // Ignore relative relocations on x86. On x86_64 they can't be ignored // since they're also used to compute absolute addresses. continue; Symbol *ref = sc->file->getSymbol(reloc.SymbolTableIndex); maybeAddAddressTakenFunction(usedSymbols, ref); } } } // Create the guard function id table. This is a table of RVAs of all // address-taken functions. It is sorted and uniqued, just like the safe SEH // table. void Writer::createGuardCFTables() { SymbolRVASet addressTakenSyms; SymbolRVASet giatsRVASet; std::vector giatsSymbols; SymbolRVASet longJmpTargets; SymbolRVASet ehContTargets; for (ObjFile *file : ctx.objFileInstances) { // If the object was compiled with /guard:cf, the address taken symbols // are in .gfids$y sections, the longjmp targets are in .gljmp$y sections, // and ehcont targets are in .gehcont$y sections. If the object was not // compiled with /guard:cf, we assume there were no setjmp and ehcont // targets, and that all code symbols with relocations are possibly // address-taken. if (file->hasGuardCF()) { markSymbolsForRVATable(file, file->getGuardFidChunks(), addressTakenSyms); markSymbolsForRVATable(file, file->getGuardIATChunks(), giatsRVASet); getSymbolsFromSections(file, file->getGuardIATChunks(), giatsSymbols); markSymbolsForRVATable(file, file->getGuardLJmpChunks(), longJmpTargets); markSymbolsForRVATable(file, file->getGuardEHContChunks(), ehContTargets); } else { markSymbolsWithRelocations(file, addressTakenSyms); } } // Mark the image entry as address-taken. if (config->entry) maybeAddAddressTakenFunction(addressTakenSyms, config->entry); // Mark exported symbols in executable sections as address-taken. for (Export &e : config->exports) maybeAddAddressTakenFunction(addressTakenSyms, e.sym); // For each entry in the .giats table, check if it has a corresponding load // thunk (e.g. because the DLL that defines it will be delay-loaded) and, if // so, add the load thunk to the address taken (.gfids) table. for (Symbol *s : giatsSymbols) { if (auto *di = dyn_cast(s)) { if (di->loadThunkSym) addSymbolToRVASet(addressTakenSyms, di->loadThunkSym); } } // Ensure sections referenced in the gfid table are 16-byte aligned. for (const ChunkAndOffset &c : addressTakenSyms) if (c.inputChunk->getAlignment() < 16) c.inputChunk->setAlignment(16); maybeAddRVATable(std::move(addressTakenSyms), "__guard_fids_table", "__guard_fids_count"); // Add the Guard Address Taken IAT Entry Table (.giats). maybeAddRVATable(std::move(giatsRVASet), "__guard_iat_table", "__guard_iat_count"); // Add the longjmp target table unless the user told us not to. if (config->guardCF & GuardCFLevel::LongJmp) maybeAddRVATable(std::move(longJmpTargets), "__guard_longjmp_table", "__guard_longjmp_count"); // Add the ehcont target table unless the user told us not to. if (config->guardCF & GuardCFLevel::EHCont) maybeAddRVATable(std::move(ehContTargets), "__guard_eh_cont_table", "__guard_eh_cont_count", true); // Set __guard_flags, which will be used in the load config to indicate that // /guard:cf was enabled. uint32_t guardFlags = uint32_t(coff_guard_flags::CFInstrumented) | uint32_t(coff_guard_flags::HasFidTable); if (config->guardCF & GuardCFLevel::LongJmp) guardFlags |= uint32_t(coff_guard_flags::HasLongJmpTable); if (config->guardCF & GuardCFLevel::EHCont) guardFlags |= uint32_t(coff_guard_flags::HasEHContTable); Symbol *flagSym = ctx.symtab.findUnderscore("__guard_flags"); cast(flagSym)->setVA(guardFlags); } // Take a list of input sections containing symbol table indices and add those // symbols to a vector. The challenge is that symbol RVAs are not known and // depend on the table size, so we can't directly build a set of integers. void Writer::getSymbolsFromSections(ObjFile *file, ArrayRef symIdxChunks, std::vector &symbols) { for (SectionChunk *c : symIdxChunks) { // Skip sections discarded by linker GC. This comes up when a .gfids section // is associated with something like a vtable and the vtable is discarded. // In this case, the associated gfids section is discarded, and we don't // mark the virtual member functions as address-taken by the vtable. if (!c->live) continue; // Validate that the contents look like symbol table indices. ArrayRef data = c->getContents(); if (data.size() % 4 != 0) { warn("ignoring " + c->getSectionName() + " symbol table index section in object " + toString(file)); continue; } // Read each symbol table index and check if that symbol was included in the // final link. If so, add it to the vector of symbols. ArrayRef symIndices( reinterpret_cast(data.data()), data.size() / 4); ArrayRef objSymbols = file->getSymbols(); for (uint32_t symIndex : symIndices) { if (symIndex >= objSymbols.size()) { warn("ignoring invalid symbol table index in section " + c->getSectionName() + " in object " + toString(file)); continue; } if (Symbol *s = objSymbols[symIndex]) { if (s->isLive()) symbols.push_back(cast(s)); } } } } // Take a list of input sections containing symbol table indices and add those // symbols to an RVA table. void Writer::markSymbolsForRVATable(ObjFile *file, ArrayRef symIdxChunks, SymbolRVASet &tableSymbols) { std::vector syms; getSymbolsFromSections(file, symIdxChunks, syms); for (Symbol *s : syms) addSymbolToRVASet(tableSymbols, cast(s)); } // Replace the absolute table symbol with a synthetic symbol pointing to // tableChunk so that we can emit base relocations for it and resolve section // relative relocations. void Writer::maybeAddRVATable(SymbolRVASet tableSymbols, StringRef tableSym, StringRef countSym, bool hasFlag) { if (tableSymbols.empty()) return; NonSectionChunk *tableChunk; if (hasFlag) tableChunk = make(std::move(tableSymbols)); else tableChunk = make(std::move(tableSymbols)); rdataSec->addChunk(tableChunk); Symbol *t = ctx.symtab.findUnderscore(tableSym); Symbol *c = ctx.symtab.findUnderscore(countSym); replaceSymbol(t, t->getName(), tableChunk); cast(c)->setVA(tableChunk->getSize() / (hasFlag ? 5 : 4)); } // MinGW specific. Gather all relocations that are imported from a DLL even // though the code didn't expect it to, produce the table that the runtime // uses for fixing them up, and provide the synthetic symbols that the // runtime uses for finding the table. void Writer::createRuntimePseudoRelocs() { std::vector rels; for (Chunk *c : ctx.symtab.getChunks()) { auto *sc = dyn_cast(c); if (!sc || !sc->live) continue; sc->getRuntimePseudoRelocs(rels); } if (!config->pseudoRelocs) { // Not writing any pseudo relocs; if some were needed, error out and // indicate what required them. for (const RuntimePseudoReloc &rpr : rels) error("automatic dllimport of " + rpr.sym->getName() + " in " + toString(rpr.target->file) + " requires pseudo relocations"); return; } if (!rels.empty()) log("Writing " + Twine(rels.size()) + " runtime pseudo relocations"); PseudoRelocTableChunk *table = make(rels); rdataSec->addChunk(table); EmptyChunk *endOfList = make(); rdataSec->addChunk(endOfList); Symbol *headSym = ctx.symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST__"); Symbol *endSym = ctx.symtab.findUnderscore("__RUNTIME_PSEUDO_RELOC_LIST_END__"); replaceSymbol(headSym, headSym->getName(), table); replaceSymbol(endSym, endSym->getName(), endOfList); } // MinGW specific. // The MinGW .ctors and .dtors lists have sentinels at each end; // a (uintptr_t)-1 at the start and a (uintptr_t)0 at the end. // There's a symbol pointing to the start sentinel pointer, __CTOR_LIST__ // and __DTOR_LIST__ respectively. void Writer::insertCtorDtorSymbols() { AbsolutePointerChunk *ctorListHead = make(-1); AbsolutePointerChunk *ctorListEnd = make(0); AbsolutePointerChunk *dtorListHead = make(-1); AbsolutePointerChunk *dtorListEnd = make(0); ctorsSec->insertChunkAtStart(ctorListHead); ctorsSec->addChunk(ctorListEnd); dtorsSec->insertChunkAtStart(dtorListHead); dtorsSec->addChunk(dtorListEnd); Symbol *ctorListSym = ctx.symtab.findUnderscore("__CTOR_LIST__"); Symbol *dtorListSym = ctx.symtab.findUnderscore("__DTOR_LIST__"); replaceSymbol(ctorListSym, ctorListSym->getName(), ctorListHead); replaceSymbol(dtorListSym, dtorListSym->getName(), dtorListHead); } // Handles /section options to allow users to overwrite // section attributes. void Writer::setSectionPermissions() { for (auto &p : config->section) { StringRef name = p.first; uint32_t perm = p.second; for (OutputSection *sec : ctx.outputSections) if (sec->name == name) sec->setPermissions(perm); } } // Write section contents to a mmap'ed file. void Writer::writeSections() { // Record the number of sections to apply section index relocations // against absolute symbols. See applySecIdx in Chunks.cpp.. DefinedAbsolute::numOutputSections = ctx.outputSections.size(); uint8_t *buf = buffer->getBufferStart(); for (OutputSection *sec : ctx.outputSections) { uint8_t *secBuf = buf + sec->getFileOff(); // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) memset(secBuf, 0xCC, sec->getRawSize()); parallelForEach(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); }); } } void Writer::writeBuildId() { // There are two important parts to the build ID. // 1) If building with debug info, the COFF debug directory contains a // timestamp as well as a Guid and Age of the PDB. // 2) In all cases, the PE COFF file header also contains a timestamp. // For reproducibility, instead of a timestamp we want to use a hash of the // PE contents. if (config->debug) { assert(buildId && "BuildId is not set!"); // BuildId->BuildId was filled in when the PDB was written. } // At this point the only fields in the COFF file which remain unset are the // "timestamp" in the COFF file header, and the ones in the coff debug // directory. Now we can hash the file and write that hash to the various // timestamp fields in the file. StringRef outputFileData( reinterpret_cast(buffer->getBufferStart()), buffer->getBufferSize()); uint32_t timestamp = config->timestamp; uint64_t hash = 0; bool generateSyntheticBuildId = config->mingw && config->debug && config->pdbPath.empty(); if (config->repro || generateSyntheticBuildId) hash = xxHash64(outputFileData); if (config->repro) timestamp = static_cast(hash); if (generateSyntheticBuildId) { // For MinGW builds without a PDB file, we still generate a build id // to allow associating a crash dump to the executable. buildId->buildId->PDB70.CVSignature = OMF::Signature::PDB70; buildId->buildId->PDB70.Age = 1; memcpy(buildId->buildId->PDB70.Signature, &hash, 8); // xxhash only gives us 8 bytes, so put some fixed data in the other half. memcpy(&buildId->buildId->PDB70.Signature[8], "LLD PDB.", 8); } if (debugDirectory) debugDirectory->setTimeDateStamp(timestamp); uint8_t *buf = buffer->getBufferStart(); buf += dosStubSize + sizeof(PEMagic); object::coff_file_header *coffHeader = reinterpret_cast(buf); coffHeader->TimeDateStamp = timestamp; } // Sort .pdata section contents according to PE/COFF spec 5.5. void Writer::sortExceptionTable() { if (!firstPdata) return; // We assume .pdata contains function table entries only. auto bufAddr = [&](Chunk *c) { OutputSection *os = ctx.getOutputSection(c); return buffer->getBufferStart() + os->getFileOff() + c->getRVA() - os->getRVA(); }; uint8_t *begin = bufAddr(firstPdata); uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize(); if (config->machine == AMD64) { struct Entry { ulittle32_t begin, end, unwind; }; if ((end - begin) % sizeof(Entry) != 0) { fatal("unexpected .pdata size: " + Twine(end - begin) + " is not a multiple of " + Twine(sizeof(Entry))); } parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } if (config->machine == ARMNT || config->machine == ARM64) { struct Entry { ulittle32_t begin, unwind; }; if ((end - begin) % sizeof(Entry) != 0) { fatal("unexpected .pdata size: " + Twine(end - begin) + " is not a multiple of " + Twine(sizeof(Entry))); } parallelSort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } lld::errs() << "warning: don't know how to handle .pdata.\n"; } // The CRT section contains, among other things, the array of function // pointers that initialize every global variable that is not trivially // constructed. The CRT calls them one after the other prior to invoking // main(). // // As per C++ spec, 3.6.2/2.3, // "Variables with ordered initialization defined within a single // translation unit shall be initialized in the order of their definitions // in the translation unit" // // It is therefore critical to sort the chunks containing the function // pointers in the order that they are listed in the object file (top to // bottom), otherwise global objects might not be initialized in the // correct order. void Writer::sortCRTSectionChunks(std::vector &chunks) { auto sectionChunkOrder = [](const Chunk *a, const Chunk *b) { auto sa = dyn_cast(a); auto sb = dyn_cast(b); assert(sa && sb && "Non-section chunks in CRT section!"); StringRef sAObj = sa->file->mb.getBufferIdentifier(); StringRef sBObj = sb->file->mb.getBufferIdentifier(); return sAObj == sBObj && sa->getSectionNumber() < sb->getSectionNumber(); }; llvm::stable_sort(chunks, sectionChunkOrder); if (config->verbose) { for (auto &c : chunks) { auto sc = dyn_cast(c); log(" " + sc->file->mb.getBufferIdentifier().str() + ", SectionID: " + Twine(sc->getSectionNumber())); } } } OutputSection *Writer::findSection(StringRef name) { for (OutputSection *sec : ctx.outputSections) if (sec->name == name) return sec; return nullptr; } uint32_t Writer::getSizeOfInitializedData() { uint32_t res = 0; for (OutputSection *s : ctx.outputSections) if (s->header.Characteristics & IMAGE_SCN_CNT_INITIALIZED_DATA) res += s->getRawSize(); return res; } // Add base relocations to .reloc section. void Writer::addBaserels() { if (!config->relocatable) return; relocSec->chunks.clear(); std::vector v; for (OutputSection *sec : ctx.outputSections) { if (sec->header.Characteristics & IMAGE_SCN_MEM_DISCARDABLE) continue; // Collect all locations for base relocations. for (Chunk *c : sec->chunks) c->getBaserels(&v); // Add the addresses to .reloc section. if (!v.empty()) addBaserelBlocks(v); v.clear(); } } // Add addresses to .reloc section. Note that addresses are grouped by page. void Writer::addBaserelBlocks(std::vector &v) { const uint32_t mask = ~uint32_t(pageSize - 1); uint32_t page = v[0].rva & mask; size_t i = 0, j = 1; for (size_t e = v.size(); j < e; ++j) { uint32_t p = v[j].rva & mask; if (p == page) continue; relocSec->addChunk(make(page, &v[i], &v[0] + j)); i = j; page = p; } if (i == j) return; relocSec->addChunk(make(page, &v[i], &v[0] + j)); } PartialSection *Writer::createPartialSection(StringRef name, uint32_t outChars) { PartialSection *&pSec = partialSections[{name, outChars}]; if (pSec) return pSec; pSec = make(name, outChars); return pSec; } PartialSection *Writer::findPartialSection(StringRef name, uint32_t outChars) { auto it = partialSections.find({name, outChars}); if (it != partialSections.end()) return it->second; return nullptr; } void Writer::fixTlsAlignment() { Defined *tlsSym = dyn_cast_or_null(ctx.symtab.findUnderscore("_tls_used")); if (!tlsSym) return; OutputSection *sec = ctx.getOutputSection(tlsSym->getChunk()); assert(sec && tlsSym->getRVA() >= sec->getRVA() && "no output section for _tls_used"); uint8_t *secBuf = buffer->getBufferStart() + sec->getFileOff(); uint64_t tlsOffset = tlsSym->getRVA() - sec->getRVA(); uint64_t directorySize = config->is64() ? sizeof(object::coff_tls_directory64) : sizeof(object::coff_tls_directory32); if (tlsOffset + directorySize > sec->getRawSize()) fatal("_tls_used sym is malformed"); if (config->is64()) { object::coff_tls_directory64 *tlsDir = reinterpret_cast(&secBuf[tlsOffset]); tlsDir->setAlignment(tlsAlignment); } else { object::coff_tls_directory32 *tlsDir = reinterpret_cast(&secBuf[tlsOffset]); tlsDir->setAlignment(tlsAlignment); } } diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h index f86ee5a14874..d3fa3b879125 100644 --- a/llvm/include/llvm/Analysis/InlineCost.h +++ b/llvm/include/llvm/Analysis/InlineCost.h @@ -1,325 +1,330 @@ //===- InlineCost.h - Cost analysis for inliner -----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements heuristics for inlining decisions. // //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_INLINECOST_H #define LLVM_ANALYSIS_INLINECOST_H #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/Analysis/InlineModelFeatureMaps.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include #include namespace llvm { class BlockFrequencyInfo; class CallBase; class DataLayout; class Function; class ProfileSummaryInfo; class TargetTransformInfo; class TargetLibraryInfo; namespace InlineConstants { // Various thresholds used by inline cost analysis. /// Use when optsize (-Os) is specified. const int OptSizeThreshold = 50; /// Use when minsize (-Oz) is specified. const int OptMinSizeThreshold = 5; /// Use when -O3 is specified. const int OptAggressiveThreshold = 250; // Various magic constants used to adjust heuristics. const int InstrCost = 5; const int IndirectCallThreshold = 100; const int LoopPenalty = 25; const int LastCallToStaticBonus = 15000; const int ColdccPenalty = 2000; /// Do not inline functions which allocate this many bytes on the stack /// when the caller is recursive. const unsigned TotalAllocaSizeRecursiveCaller = 1024; /// Do not inline dynamic allocas that have been constant propagated to be /// static allocas above this amount in bytes. const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536; + +const char FunctionInlineCostMultiplierAttributeName[] = + "function-inline-cost-multiplier"; } // namespace InlineConstants // The cost-benefit pair computed by cost-benefit analysis. class CostBenefitPair { public: CostBenefitPair(APInt Cost, APInt Benefit) : Cost(Cost), Benefit(Benefit) {} const APInt &getCost() const { return Cost; } const APInt &getBenefit() const { return Benefit; } private: APInt Cost; APInt Benefit; }; /// Represents the cost of inlining a function. /// /// This supports special values for functions which should "always" or /// "never" be inlined. Otherwise, the cost represents a unitless amount; /// smaller values increase the likelihood of the function being inlined. /// /// Objects of this type also provide the adjusted threshold for inlining /// based on the information available for a particular callsite. They can be /// directly tested to determine if inlining should occur given the cost and /// threshold for this cost metric. class InlineCost { enum SentinelValues { AlwaysInlineCost = INT_MIN, NeverInlineCost = INT_MAX }; /// The estimated cost of inlining this callsite. int Cost = 0; /// The adjusted threshold against which this cost was computed. int Threshold = 0; /// Must be set for Always and Never instances. const char *Reason = nullptr; /// The cost-benefit pair computed by cost-benefit analysis. Optional CostBenefit = None; // Trivial constructor, interesting logic in the factory functions below. InlineCost(int Cost, int Threshold, const char *Reason = nullptr, Optional CostBenefit = None) : Cost(Cost), Threshold(Threshold), Reason(Reason), CostBenefit(CostBenefit) { assert((isVariable() || Reason) && "Reason must be provided for Never or Always"); } public: static InlineCost get(int Cost, int Threshold) { assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value"); assert(Cost < NeverInlineCost && "Cost crosses sentinel value"); return InlineCost(Cost, Threshold); } static InlineCost getAlways(const char *Reason, Optional CostBenefit = None) { return InlineCost(AlwaysInlineCost, 0, Reason, CostBenefit); } static InlineCost getNever(const char *Reason, Optional CostBenefit = None) { return InlineCost(NeverInlineCost, 0, Reason, CostBenefit); } /// Test whether the inline cost is low enough for inlining. explicit operator bool() const { return Cost < Threshold; } bool isAlways() const { return Cost == AlwaysInlineCost; } bool isNever() const { return Cost == NeverInlineCost; } bool isVariable() const { return !isAlways() && !isNever(); } /// Get the inline cost estimate. /// It is an error to call this on an "always" or "never" InlineCost. int getCost() const { assert(isVariable() && "Invalid access of InlineCost"); return Cost; } /// Get the threshold against which the cost was computed int getThreshold() const { assert(isVariable() && "Invalid access of InlineCost"); return Threshold; } /// Get the cost-benefit pair which was computed by cost-benefit analysis Optional getCostBenefit() const { return CostBenefit; } /// Get the reason of Always or Never. const char *getReason() const { assert((Reason || isVariable()) && "InlineCost reason must be set for Always or Never"); return Reason; } /// Get the cost delta from the threshold for inlining. /// Only valid if the cost is of the variable kind. Returns a negative /// value if the cost is too high to inline. int getCostDelta() const { return Threshold - getCost(); } }; /// InlineResult is basically true or false. For false results the message /// describes a reason. class InlineResult { const char *Message = nullptr; InlineResult(const char *Message = nullptr) : Message(Message) {} public: static InlineResult success() { return {}; } static InlineResult failure(const char *Reason) { return InlineResult(Reason); } bool isSuccess() const { return Message == nullptr; } const char *getFailureReason() const { assert(!isSuccess() && "getFailureReason should only be called in failure cases"); return Message; } }; /// Thresholds to tune inline cost analysis. The inline cost analysis decides /// the condition to apply a threshold and applies it. Otherwise, /// DefaultThreshold is used. If a threshold is Optional, it is applied only /// when it has a valid value. Typically, users of inline cost analysis /// obtain an InlineParams object through one of the \c getInlineParams methods /// and pass it to \c getInlineCost. Some specialized versions of inliner /// (such as the pre-inliner) might have custom logic to compute \c InlineParams /// object. struct InlineParams { /// The default threshold to start with for a callee. int DefaultThreshold = -1; /// Threshold to use for callees with inline hint. Optional HintThreshold; /// Threshold to use for cold callees. Optional ColdThreshold; /// Threshold to use when the caller is optimized for size. Optional OptSizeThreshold; /// Threshold to use when the caller is optimized for minsize. Optional OptMinSizeThreshold; /// Threshold to use when the callsite is considered hot. Optional HotCallSiteThreshold; /// Threshold to use when the callsite is considered hot relative to function /// entry. Optional LocallyHotCallSiteThreshold; /// Threshold to use when the callsite is considered cold. Optional ColdCallSiteThreshold; /// Compute inline cost even when the cost has exceeded the threshold. Optional ComputeFullInlineCost; /// Indicate whether we should allow inline deferral. Optional EnableDeferral; /// Indicate whether we allow inlining for recursive call. Optional AllowRecursiveCall = false; }; +Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind); + /// Generate the parameters to tune the inline cost analysis based only on the /// commandline options. InlineParams getInlineParams(); /// Generate the parameters to tune the inline cost analysis based on command /// line options. If -inline-threshold option is not explicitly passed, /// \p Threshold is used as the default threshold. InlineParams getInlineParams(int Threshold); /// Generate the parameters to tune the inline cost analysis based on command /// line options. If -inline-threshold option is not explicitly passed, /// the default threshold is computed from \p OptLevel and \p SizeOptLevel. /// An \p OptLevel value above 3 is considered an aggressive optimization mode. /// \p SizeOptLevel of 1 corresponds to the -Os flag and 2 corresponds to /// the -Oz flag. InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel); /// Return the cost associated with a callsite, including parameter passing /// and the call/return instruction. int getCallsiteCost(CallBase &Call, const DataLayout &DL); /// Get an InlineCost object representing the cost of inlining this /// callsite. /// /// Note that a default threshold is passed into this function. This threshold /// could be modified based on callsite's properties and only costs below this /// new threshold are computed with any accuracy. The new threshold can be /// used to bound the computation necessary to determine whether the cost is /// sufficiently low to warrant inlining. /// /// Also note that calling this function *dynamically* computes the cost of /// inlining the callsite. It is an expensive, heavyweight call. InlineCost getInlineCost(CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Get an InlineCost with the callee explicitly specified. /// This allows you to calculate the cost of inlining a function via a /// pointer. This behaves exactly as the version with no explicit callee /// parameter in all other respects. // InlineCost getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Returns InlineResult::success() if the call site should be always inlined /// because of user directives, and the inlining is viable. Returns /// InlineResult::failure() if the inlining may never happen because of user /// directives or incompatibilities detectable without needing callee traversal. /// Otherwise returns None, meaning that inlining should be decided based on /// other criteria (e.g. cost modeling). Optional getAttributeBasedInliningDecision( CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref GetTLI); /// Get the cost estimate ignoring thresholds. This is similar to getInlineCost /// when passed InlineParams::ComputeFullInlineCost, or a non-null ORE. It /// uses default InlineParams otherwise. /// Contrary to getInlineCost, which makes a threshold-based final evaluation of /// should/shouldn't inline, captured in InlineResult, getInliningCostEstimate /// returns: /// - None, if the inlining cannot happen (is illegal) /// - an integer, representing the cost. Optional getInliningCostEstimate( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Get the expanded cost features. The features are returned unconditionally, /// even if inlining is impossible. Optional getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Minimal filter to detect invalid constructs for inlining. InlineResult isInlineViable(Function &Callee); // This pass is used to annotate instructions during the inline process for // debugging and analysis. The main purpose of the pass is to see and test // inliner's decisions when creating new optimizations to InlineCost. struct InlineCostAnnotationPrinterPass : PassInfoMixin { raw_ostream &OS; public: explicit InlineCostAnnotationPrinterPass(raw_ostream &OS) : OS(OS) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); }; } // namespace llvm #endif diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h index d6228700aa9a..4d6874f784ef 100644 --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -1,567 +1,568 @@ //===-- Scalar.h - Scalar Transformations -----------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This header file defines prototypes for accessor functions that expose passes // in the Scalar transformations library. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_H #define LLVM_TRANSFORMS_SCALAR_H #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" #include namespace llvm { class Function; class FunctionPass; class ModulePass; class Pass; //===----------------------------------------------------------------------===// // // AlignmentFromAssumptions - Use assume intrinsics to set load/store // alignments. // FunctionPass *createAlignmentFromAssumptionsPass(); //===----------------------------------------------------------------------===// // // AnnotationRemarks - Emit remarks for !annotation metadata. // FunctionPass *createAnnotationRemarksLegacyPass(); //===----------------------------------------------------------------------===// // // SCCP - Sparse conditional constant propagation. // FunctionPass *createSCCPPass(); //===----------------------------------------------------------------------===// // // RedundantDbgInstElimination - This pass removes redundant dbg intrinsics // without modifying the CFG of the function. It is a FunctionPass. // Pass *createRedundantDbgInstEliminationPass(); //===----------------------------------------------------------------------===// // // DeadCodeElimination - This pass is more powerful than DeadInstElimination, // because it is worklist driven that can potentially revisit instructions when // their other instructions become dead, to eliminate chains of dead // computations. // FunctionPass *createDeadCodeEliminationPass(); //===----------------------------------------------------------------------===// // // DeadStoreElimination - This pass deletes stores that are post-dominated by // must-aliased stores and are not loaded used between the stores. // FunctionPass *createDeadStoreEliminationPass(); //===----------------------------------------------------------------------===// // // CallSiteSplitting - This pass split call-site based on its known argument // values. FunctionPass *createCallSiteSplittingPass(); //===----------------------------------------------------------------------===// // // AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This // algorithm assumes instructions are dead until proven otherwise, which makes // it more successful are removing non-obviously dead instructions. // FunctionPass *createAggressiveDCEPass(); //===----------------------------------------------------------------------===// // // GuardWidening - An optimization over the @llvm.experimental.guard intrinsic // that (optimistically) combines multiple guards into one to have fewer checks // at runtime. // FunctionPass *createGuardWideningPass(); //===----------------------------------------------------------------------===// // // LoopGuardWidening - Analogous to the GuardWidening pass, but restricted to a // single loop at a time for use within a LoopPassManager. Desired effect is // to widen guards into preheader or a single guard within loop if that's not // possible. // Pass *createLoopGuardWideningPass(); //===----------------------------------------------------------------------===// // // BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to // remove computations of dead bits. // FunctionPass *createBitTrackingDCEPass(); //===----------------------------------------------------------------------===// // // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. // FunctionPass *createSROAPass(); //===----------------------------------------------------------------------===// // // InductiveRangeCheckElimination - Transform loops to elide range checks on // linear functions of the induction variable. // Pass *createInductiveRangeCheckEliminationPass(); //===----------------------------------------------------------------------===// // // InductionVariableSimplify - Transform induction variables in a program to all // use a single canonical induction variable per loop. // Pass *createIndVarSimplifyPass(); //===----------------------------------------------------------------------===// // // LICM - This pass is a loop invariant code motion and memory promotion pass. // Pass *createLICMPass(); Pass *createLICMPass(unsigned LicmMssaOptCap, - unsigned LicmMssaNoAccForPromotionCap); + unsigned LicmMssaNoAccForPromotionCap, + bool AllowSpeculation); //===----------------------------------------------------------------------===// // // LoopSink - This pass sinks invariants from preheader to loop body where // frequency is lower than loop preheader. // Pass *createLoopSinkPass(); //===----------------------------------------------------------------------===// // // LoopPredication - This pass does loop predication on guards. // Pass *createLoopPredicationPass(); //===----------------------------------------------------------------------===// // // LoopInterchange - This pass interchanges loops to provide a more // cache-friendly memory access patterns. // Pass *createLoopInterchangePass(); //===----------------------------------------------------------------------===// // // LoopFlatten - This pass flattens nested loops into a single loop. // FunctionPass *createLoopFlattenPass(); //===----------------------------------------------------------------------===// // // LoopStrengthReduce - This pass is strength reduces GEP instructions that use // a loop's canonical induction variable as one of their indices. // Pass *createLoopStrengthReducePass(); //===----------------------------------------------------------------------===// // // LoopUnswitch - This pass is a simple loop unswitching pass. // Pass *createLoopUnswitchPass(bool OptimizeForSize = false, bool hasBranchDivergence = false); //===----------------------------------------------------------------------===// // // LoopInstSimplify - This pass simplifies instructions in a loop's body. // Pass *createLoopInstSimplifyPass(); //===----------------------------------------------------------------------===// // // LoopUnroll - This pass is a simple loop unrolling pass. // Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false, int Threshold = -1, int Count = -1, int AllowPartial = -1, int Runtime = -1, int UpperBound = -1, int AllowPeeling = -1); // Create an unrolling pass for full unrolling that uses exact trip count only // and also does peeling. Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, bool ForgetAllSCEV = false); //===----------------------------------------------------------------------===// // // LoopUnrollAndJam - This pass is a simple loop unroll and jam pass. // Pass *createLoopUnrollAndJamPass(int OptLevel = 2); //===----------------------------------------------------------------------===// // // LoopReroll - This pass is a simple loop rerolling pass. // Pass *createLoopRerollPass(); //===----------------------------------------------------------------------===// // // LoopRotate - This pass is a simple loop rotating pass. // Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false); //===----------------------------------------------------------------------===// // // LoopIdiom - This pass recognizes and replaces idioms in loops. // Pass *createLoopIdiomPass(); //===----------------------------------------------------------------------===// // // LoopVersioningLICM - This pass is a loop versioning pass for LICM. // Pass *createLoopVersioningLICMPass(); //===----------------------------------------------------------------------===// // // DemoteRegisterToMemoryPass - This pass is used to demote registers to memory // references. In basically undoes the PromoteMemoryToRegister pass to make cfg // hacking easier. // FunctionPass *createDemoteRegisterToMemoryPass(); extern char &DemoteRegisterToMemoryID; //===----------------------------------------------------------------------===// // // Reassociate - This pass reassociates commutative expressions in an order that // is designed to promote better constant propagation, GCSE, LICM, PRE... // // For example: 4 + (x + 5) -> x + (4 + 5) // FunctionPass *createReassociatePass(); //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some // preds always go to some succ. If FreezeSelectCond is true, unfold the // condition of a select that unfolds to branch. Thresholds other than minus one // override the internal BB duplication default threshold. // FunctionPass *createJumpThreadingPass(bool FreezeSelectCond = false, int Threshold = -1); //===----------------------------------------------------------------------===// // // DFAJumpThreading - When a switch statement inside a loop is used to // implement a deterministic finite automata we can jump thread the switch // statement reducing number of conditional jumps. // FunctionPass *createDFAJumpThreadingPass(); //===----------------------------------------------------------------------===// // // CFGSimplification - Merge basic blocks, eliminate unreachable blocks, // simplify terminator instructions, convert switches to lookup tables, etc. // FunctionPass *createCFGSimplificationPass( SimplifyCFGOptions Options = SimplifyCFGOptions(), std::function Ftor = nullptr); //===----------------------------------------------------------------------===// // // FlattenCFG - flatten CFG, reduce number of conditional branches by using // parallel-and and parallel-or mode, etc... // FunctionPass *createFlattenCFGPass(); //===----------------------------------------------------------------------===// // // CFG Structurization - Remove irreducible control flow // /// /// When \p SkipUniformRegions is true the structizer will not structurize /// regions that only contain uniform branches. Pass *createStructurizeCFGPass(bool SkipUniformRegions = false); //===----------------------------------------------------------------------===// // // TailCallElimination - This pass eliminates call instructions to the current // function which occur immediately before return instructions. // FunctionPass *createTailCallEliminationPass(); //===----------------------------------------------------------------------===// // // EarlyCSE - This pass performs a simple and fast CSE pass over the dominator // tree. // FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false); //===----------------------------------------------------------------------===// // // GVNHoist - This pass performs a simple and fast GVN pass over the dominator // tree to hoist common expressions from sibling branches. // FunctionPass *createGVNHoistPass(); //===----------------------------------------------------------------------===// // // GVNSink - This pass uses an "inverted" value numbering to decide the // similarity of expressions and sinks similar expressions into successors. // FunctionPass *createGVNSinkPass(); //===----------------------------------------------------------------------===// // // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads // are hoisted into the header, while stores sink into the footer. // FunctionPass *createMergedLoadStoreMotionPass(bool SplitFooterBB = false); //===----------------------------------------------------------------------===// // // GVN - This pass performs global value numbering and redundant load // elimination cotemporaneously. // FunctionPass *createNewGVNPass(); //===----------------------------------------------------------------------===// // // DivRemPairs - Hoist/decompose integer division and remainder instructions. // FunctionPass *createDivRemPairsPass(); //===----------------------------------------------------------------------===// // // MemCpyOpt - This pass performs optimizations related to eliminating memcpy // calls and/or combining multiple stores into memset's. // FunctionPass *createMemCpyOptPass(); //===----------------------------------------------------------------------===// // // LoopDeletion - This pass performs DCE of non-infinite loops that it // can prove are dead. // Pass *createLoopDeletionPass(); //===----------------------------------------------------------------------===// // // ConstantHoisting - This pass prepares a function for expensive constants. // FunctionPass *createConstantHoistingPass(); //===----------------------------------------------------------------------===// // // ConstraintElimination - This pass eliminates conditions based on found // constraints. // FunctionPass *createConstraintEliminationPass(); //===----------------------------------------------------------------------===// // // Sink - Code Sinking // FunctionPass *createSinkingPass(); //===----------------------------------------------------------------------===// // // LowerAtomic - Lower atomic intrinsics to non-atomic form // Pass *createLowerAtomicPass(); //===----------------------------------------------------------------------===// // // LowerGuardIntrinsic - Lower guard intrinsics to normal control flow. // Pass *createLowerGuardIntrinsicPass(); //===----------------------------------------------------------------------===// // // LowerMatrixIntrinsics - Lower matrix intrinsics to vector operations. // Pass *createLowerMatrixIntrinsicsPass(); //===----------------------------------------------------------------------===// // // LowerMatrixIntrinsicsMinimal - Lower matrix intrinsics to vector operations // (lightweight, does not require extra analysis) // Pass *createLowerMatrixIntrinsicsMinimalPass(); //===----------------------------------------------------------------------===// // // LowerWidenableCondition - Lower widenable condition to i1 true. // Pass *createLowerWidenableConditionPass(); //===----------------------------------------------------------------------===// // // MergeICmps - Merge integer comparison chains into a memcmp // Pass *createMergeICmpsLegacyPass(); //===----------------------------------------------------------------------===// // // ValuePropagation - Propagate CFG-derived value information // Pass *createCorrelatedValuePropagationPass(); //===----------------------------------------------------------------------===// // // InferAddressSpaces - Modify users of addrspacecast instructions with values // in the source address space if using the destination address space is slower // on the target. If AddressSpace is left to its default value, it will be // obtained from the TargetTransformInfo. // FunctionPass *createInferAddressSpacesPass(unsigned AddressSpace = ~0u); extern char &InferAddressSpacesID; //===----------------------------------------------------------------------===// // // LowerExpectIntrinsics - Removes llvm.expect intrinsics and creates // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); //===----------------------------------------------------------------------===// // // LowerConstantIntrinsicss - Expand any remaining llvm.objectsize and // llvm.is.constant intrinsic calls, even for the unknown cases. // FunctionPass *createLowerConstantIntrinsicsPass(); //===----------------------------------------------------------------------===// // // PartiallyInlineLibCalls - Tries to inline the fast path of library // calls such as sqrt. // FunctionPass *createPartiallyInlineLibCallsPass(); //===----------------------------------------------------------------------===// // // SeparateConstOffsetFromGEP - Split GEPs for better CSE // FunctionPass *createSeparateConstOffsetFromGEPPass(bool LowerGEP = false); //===----------------------------------------------------------------------===// // // SpeculativeExecution - Aggressively hoist instructions to enable // speculative execution on targets where branches are expensive. // FunctionPass *createSpeculativeExecutionPass(); // Same as createSpeculativeExecutionPass, but does nothing unless // TargetTransformInfo::hasBranchDivergence() is true. FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); //===----------------------------------------------------------------------===// // // StraightLineStrengthReduce - This pass strength-reduces some certain // instruction patterns in straight-line code. // FunctionPass *createStraightLineStrengthReducePass(); //===----------------------------------------------------------------------===// // // PlaceSafepoints - Rewrite any IR calls to gc.statepoints and insert any // safepoint polls (method entry, backedge) that might be required. This pass // does not generate explicit relocation sequences - that's handled by // RewriteStatepointsForGC which can be run at an arbitrary point in the pass // order following this pass. // FunctionPass *createPlaceSafepointsPass(); //===----------------------------------------------------------------------===// // // RewriteStatepointsForGC - Rewrite any gc.statepoints which do not yet have // explicit relocations to include explicit relocations. // ModulePass *createRewriteStatepointsForGCLegacyPass(); //===----------------------------------------------------------------------===// // // Float2Int - Demote floats to ints where possible. // FunctionPass *createFloat2IntPass(); //===----------------------------------------------------------------------===// // // NaryReassociate - Simplify n-ary operations by reassociation. // FunctionPass *createNaryReassociatePass(); //===----------------------------------------------------------------------===// // // LoopDistribute - Distribute loops. // FunctionPass *createLoopDistributePass(); //===----------------------------------------------------------------------===// // // LoopFuse - Fuse loops. // FunctionPass *createLoopFusePass(); //===----------------------------------------------------------------------===// // // LoopLoadElimination - Perform loop-aware load elimination. // FunctionPass *createLoopLoadEliminationPass(); //===----------------------------------------------------------------------===// // // LoopVersioning - Perform loop multi-versioning. // FunctionPass *createLoopVersioningPass(); //===----------------------------------------------------------------------===// // // LoopDataPrefetch - Perform data prefetching in loops. // FunctionPass *createLoopDataPrefetchPass(); ///===---------------------------------------------------------------------===// ModulePass *createNameAnonGlobalPass(); ModulePass *createCanonicalizeAliasesPass(); //===----------------------------------------------------------------------===// // // LibCallsShrinkWrap - Shrink-wraps a call to function if the result is not // used. // FunctionPass *createLibCallsShrinkWrapPass(); //===----------------------------------------------------------------------===// // // LoopSimplifyCFG - This pass performs basic CFG simplification on loops, // primarily to help other loop passes. // Pass *createLoopSimplifyCFGPass(); //===----------------------------------------------------------------------===// // // WarnMissedTransformations - This pass emits warnings for leftover forced // transformations. // Pass *createWarnMissedTransformationsPass(); //===----------------------------------------------------------------------===// // // This pass does instruction simplification on each // instruction in a function. // FunctionPass *createInstSimplifyLegacyPass(); //===----------------------------------------------------------------------===// // // createScalarizeMaskedMemIntrinPass - Replace masked load, store, gather // and scatter intrinsics with scalar code when target doesn't support them. // FunctionPass *createScalarizeMaskedMemIntrinLegacyPass(); } // End llvm namespace #endif diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h index 751f75c0ccb2..503c8792d309 100644 --- a/llvm/include/llvm/Transforms/Scalar/LICM.h +++ b/llvm/include/llvm/Transforms/Scalar/LICM.h @@ -1,78 +1,86 @@ //===- LICM.h - Loop Invariant Code Motion Pass -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass performs loop invariant code motion, attempting to remove as much // code from the body of a loop as possible. It does this by either hoisting // code into the preheader block, or by sinking code to the exit blocks if it is // safe. This pass also promotes must-aliased memory locations in the loop to // live in registers, thus hoisting and sinking "invariant" loads and stores. // // This pass uses alias analysis for two purposes: // // 1. Moving loop invariant loads and calls out of loops. If we can determine // that a load or call inside of a loop never aliases anything stored to, // we can hoist it or sink it like any other instruction. // 2. Scalar Promotion of Memory - If there is a store instruction inside of // the loop, we try to move the store to happen AFTER the loop instead of // inside of the loop. This can only happen if a few conditions are true: // A. The pointer stored through is loop invariant // B. There are no stores or loads in the loop which _may_ alias the // pointer. There are no calls in the loop which mod/ref the pointer. // If these conditions are true, we can promote the loads and stores in the // loop of the pointer to use a temporary alloca'd variable. We then use // the SSAUpdater to construct the appropriate SSA form for the value. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_SCALAR_LICM_H #define LLVM_TRANSFORMS_SCALAR_LICM_H #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" namespace llvm { extern cl::opt SetLicmMssaOptCap; extern cl::opt SetLicmMssaNoAccForPromotionCap; /// Performs Loop Invariant Code Motion Pass. class LICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(true) {} + LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; /// Performs LoopNest Invariant Code Motion Pass. class LNICMPass : public PassInfoMixin { unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; + bool LicmAllowSpeculation; public: LNICMPass() : LicmMssaOptCap(SetLicmMssaOptCap), - LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {} - LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap) + LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(true) {} + LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, + bool LicmAllowSpeculation) : LicmMssaOptCap(LicmMssaOptCap), - LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {} + LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap), + LicmAllowSpeculation(LicmAllowSpeculation) {} PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LICM_H diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 3a712d78df67..134f8bcfd888 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -1,534 +1,539 @@ //===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines some loop transformation utilities. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { template class DomTreeNodeBase; using DomTreeNode = DomTreeNodeBase; class AAResults; class AliasSet; class AliasSetTracker; class BasicBlock; class BlockFrequencyInfo; class ICFLoopSafetyInfo; class IRBuilderBase; class Loop; class LoopInfo; class MemoryAccess; class MemorySSA; class MemorySSAUpdater; class OptimizationRemarkEmitter; class PredIteratorCache; class ScalarEvolution; class SCEV; class SCEVExpander; class TargetLibraryInfo; class LPPassManager; class Instruction; struct RuntimeCheckingPtrGroup; typedef std::pair RuntimePointerCheck; template class Optional; template class SmallSetVector; template class SmallVector; template class SmallVectorImpl; template class SmallPriorityWorklist; BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA); /// Ensure that all exit blocks of the loop are dedicated exits. /// /// For any loop exit block with non-loop predecessors, we split the loop /// predecessors to use a dedicated loop exit block. We update the dominator /// tree and loop info if provided, and will preserve LCSSA if requested. bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA); /// Ensures LCSSA form for every instruction from the Worklist in the scope of /// innermost containing loop. /// /// For the given instruction which have uses outside of the loop, an LCSSA PHI /// node is inserted and the uses outside the loop are rewritten to use this /// node. /// /// LoopInfo and DominatorTree are required and, since the routine makes no /// changes to CFG, preserved. /// /// Returns true if any modifications are made. /// /// This function may introduce unused PHI nodes. If \p PHIsToRemove is not /// nullptr, those are added to it (before removing, the caller has to check if /// they still do not have any uses). Otherwise the PHIs are directly removed. bool formLCSSAForInstructions( SmallVectorImpl &Worklist, const DominatorTree &DT, const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder, SmallVectorImpl *PHIsToRemove = nullptr); /// Put loop into LCSSA form. /// /// Looks at all instructions in the loop which have uses outside of the /// current loop. For each, an LCSSA PHI node is inserted and the uses outside /// the loop are rewritten to use this node. Sub-loops must be in LCSSA form /// already. /// /// LoopInfo and DominatorTree are required and preserved. /// /// If ScalarEvolution is passed in, it will be preserved. /// /// Returns true if any modifications are made to the loop. bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE); /// Put a loop nest into LCSSA form. /// /// This recursively forms LCSSA for a loop nest. /// /// LoopInfo and DominatorTree are required and preserved. /// /// If ScalarEvolution is passed in, it will be preserved. /// /// Returns true if any modifications are made to the loop. bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI, ScalarEvolution *SE); /// Flags controlling how much is checked when sinking or hoisting /// instructions. The number of memory access in the loop (and whether there /// are too many) is determined in the constructors when using MemorySSA. class SinkAndHoistLICMFlags { public: // Explicitly set limits. SinkAndHoistLICMFlags(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink, Loop *L = nullptr, MemorySSA *MSSA = nullptr); // Use default limits. SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr, MemorySSA *MSSA = nullptr); void setIsSink(bool B) { IsSink = B; } bool getIsSink() { return IsSink; } bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; } bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; } void incrementClobberingCalls() { ++LicmMssaOptCounter; } protected: bool NoOfMemAccTooLarge = false; unsigned LicmMssaOptCounter = 0; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; bool IsSink; }; /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in /// reverse depth first order w.r.t the DominatorTree. This allows us to visit /// uses before definitions, allowing us to sink a loop body in one pass without /// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. /// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when /// this function is called by \p sinkRegionForLoopNest. bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr); /// Call sinkRegion on loops contained within the specified loop /// in order from innermost to outermost. bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth /// first order w.r.t the DominatorTree. This allows us to visit definitions /// before uses, allowing us to hoist a loop body in one pass without iteration. /// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree, /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all /// instructions of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, BlockFrequencyInfo *, TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool); + SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, + bool AllowSpeculation); /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. /// The function requires a bunch or prerequisites to be present: /// - The loop needs to be in LCSSA form /// - The loop needs to have a Preheader /// - A unique dedicated exit block must exist /// /// This also updates the relevant analysis information in \p DT, \p SE, \p LI /// and \p MSSA if pointers to those are provided. /// It also updates the loop PM if an updater struct is provided. void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA = nullptr); /// Remove the backedge of the specified loop. Handles loop nests and general /// loop structures subject to the precondition that the loop has no parent /// loop and has a single latch block. Preserves all listed analyses. void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, MemorySSA *MSSA); /// Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are /// loop invariant. It takes a set of must-alias values, Loop exit blocks /// vector, loop exit blocks insertion point vector, PredIteratorCache, /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions /// of the loop and loop safety information as arguments. /// Diagnostics is emitted via \p ORE. It returns changed status. +/// \p AllowSpeculation is whether values should be hoisted even if they are not +/// guaranteed to execute in the loop, but are safe to speculatively execute. bool promoteLoopAccessesToScalars( const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *); + OptimizationRemarkEmitter *, bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop); /// Returns the instructions that use values defined in the loop. SmallVector findDefsUsedOutsideOfLoop(Loop *L); /// Find a combination of metadata ("llvm.loop.vectorize.width" and /// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a /// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found /// then None is returned. Optional getOptionalElementCountLoopAttribute(const Loop *TheLoop); /// Create a new loop identifier for a loop created from a loop transformation. /// /// @param OrigLoopID The loop ID of the loop before the transformation. /// @param FollowupAttrs List of attribute names that contain attributes to be /// added to the new loop ID. /// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited /// from the original loop. The following values /// are considered: /// nullptr : Inherit all attributes from @p OrigLoopID. /// "" : Do not inherit any attribute from @p OrigLoopID; only use /// those specified by a followup attribute. /// "": Inherit all attributes except those which start with /// ; commonly used to remove metadata for the /// applied transformation. /// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return /// None. /// /// @return The loop ID for the after-transformation loop. The following values /// can be returned: /// None : No followup attribute was found; it is up to the /// transformation to choose attributes that make sense. /// @p OrigLoopID: The original identifier can be reused. /// nullptr : The new loop has no attributes. /// MDNode* : A new unique loop identifier. Optional makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef FollowupAttrs, const char *InheritOptionsAttrsPrefix = "", bool AlwaysNew = false); /// Look for the loop attribute that disables all transformation heuristic. bool hasDisableAllTransformsHint(const Loop *L); /// Look for the loop attribute that disables the LICM transformation heuristics. bool hasDisableLICMTransformsHint(const Loop *L); /// The mode sets how eager a transformation should be applied. enum TransformationMode { /// The pass can use heuristics to determine whether a transformation should /// be applied. TM_Unspecified, /// The transformation should be applied without considering a cost model. TM_Enable, /// The transformation should not be applied. TM_Disable, /// Force is a flag and should not be used alone. TM_Force = 0x04, /// The transformation was directed by the user, e.g. by a #pragma in /// the source code. If the transformation could not be applied, a /// warning should be emitted. TM_ForcedByUser = TM_Enable | TM_Force, /// The transformation must not be applied. For instance, `#pragma clang loop /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike /// general loop metadata, it must not be dropped. Most passes should not /// behave differently under TM_Disable and TM_SuppressedByUser. TM_SuppressedByUser = TM_Disable | TM_Force }; /// @{ /// Get the mode for LLVM's supported loop transformations. TransformationMode hasUnrollTransformation(const Loop *L); TransformationMode hasUnrollAndJamTransformation(const Loop *L); TransformationMode hasVectorizeTransformation(const Loop *L); TransformationMode hasDistributeTransformation(const Loop *L); TransformationMode hasLICMVersioningTransformation(const Loop *L); /// @} /// Set input string into loop metadata by keeping other values intact. /// If the string is already in loop metadata update value if it is /// different. void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); /// Returns a loop's estimated trip count based on branch weight metadata. /// In addition if \p EstimatedLoopInvocationWeight is not null it is /// initialized with weight of loop's latch leading to the exit. /// Returns 0 when the count is estimated to be 0, or None when a meaningful /// estimate can not be made. Optional getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight = nullptr); /// Set a loop's branch weight metadata to reflect that loop has \p /// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits /// through latch. Returns true if metadata is successfully updated, false /// otherwise. Note that loop must have a latch block which controls loop exit /// in order to succeed. bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, unsigned EstimatedLoopInvocationWeight); /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially /// true. bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE); /// Helper to consistently add the set of standard passes to a loop pass's \c /// AnalysisUsage. /// /// All loop passes should call this as part of implementing their \c /// getAnalysisUsage. void getLoopAnalysisUsage(AnalysisUsage &AU); /// Returns true if is legal to hoist or sink this instruction disregarding the /// possible introduction of faults. Reasoning about potential faulting /// instructions is the responsibility of the caller since it is challenging to /// do efficiently from within this routine. /// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the /// target executes at most once per execution of the loop body. This is used /// to assess the legality of duplicating atomic loads. Generally, this is /// true when moving out of loop and not true when moving into loops. /// If \p ORE is set use it to emit optimization remarks. bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop, SinkAndHoistLICMFlags *LICMFlags = nullptr, OptimizationRemarkEmitter *ORE = nullptr); /// Returns the comparison predicate used when expanding a min/max reduction. CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK); /// See RecurrenceDescriptor::isSelectCmpPattern for a description of the /// pattern we are trying to match. In this pattern we are only ever selecting /// between two values: 1) an initial PHI start value, and 2) a loop invariant /// value. This function uses \p LoopExitInst to determine 2), which we then use /// to select between \p Left and \p Right. Any lane value in \p Left that /// matches 2) will be merged into \p Right. Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK, Value *Left, Value *Right); /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind. /// The Builder's fast-math-flags must be set to propagate the expected values. Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right); /// Generates an ordered vector reduction using extracts to reduce the value. Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src, unsigned Op, RecurKind MinMaxKind = RecurKind::None); /// Generates a vector reduction using shufflevectors to reduce the value. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, RecurKind MinMaxKind = RecurKind::None); /// Create a target reduction of the given vector. The reduction operation /// is described by the \p Opcode parameter. min/max reductions require /// additional information supplied in \p RdxKind. /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *createSimpleTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind); /// Create a target reduction of the given vector \p Src for a reduction of the /// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation /// is described by \p Desc. Value *createSelectCmpTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, Value *Src, const RecurrenceDescriptor &Desc, PHINode *OrigPhi); /// Create a generic target reduction using a recurrence descriptor \p Desc /// The target is queried to determine if intrinsics or shuffle sequences are /// required to implement the reduction. /// Fast-math-flags are propagated using the RecurrenceDescriptor. Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI, const RecurrenceDescriptor &Desc, Value *Src, PHINode *OrigPhi = nullptr); /// Create an ordered reduction intrinsic using the given recurrence /// descriptor \p Desc. Value *createOrderedReduction(IRBuilderBase &B, const RecurrenceDescriptor &Desc, Value *Src, Value *Start); /// Get the intersection (logical and) of all of the potential IR flags /// of each scalar operation (VL) that will be converted into a vector (I). /// If OpValue is non-null, we only consider operations similar to OpValue /// when intersecting. /// Flag set: NSW, NUW, exact, and all of fast-math. void propagateIRFlags(Value *I, ArrayRef VL, Value *OpValue = nullptr); /// Returns true if we can prove that \p S is defined and always negative in /// loop \p L. bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); /// Returns true if we can prove that \p S is defined and always non-negative in /// loop \p L. bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE); /// Returns true if \p S is defined and never is equal to signed/unsigned max. bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed); /// Returns true if \p S is defined and never is equal to signed/unsigned min. bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE, bool Signed); enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl }; /// If the final value of any expressions that are recurrent in the loop can /// be computed, substitute the exit values from the loop into any instructions /// outside of the loop that use the final values of the current expressions. /// Return the number of loop exit values that have been replaced, and the /// corresponding phi node will be added to DeadInsts. int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI, ScalarEvolution *SE, const TargetTransformInfo *TTI, SCEVExpander &Rewriter, DominatorTree *DT, ReplaceExitVal ReplaceExitValue, SmallVector &DeadInsts); /// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for /// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p /// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that /// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect /// the remaining TC%UF iterations. /// /// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p /// RemainderLoop in which case weights for \p OrigLoop are updated accordingly. /// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are /// equal. \p UF must be greater than zero. /// If \p OrigLoop has no profile info associated nothing happens. /// /// This utility may be useful for such optimizations as unroller and /// vectorizer as it's typical transformation for them. void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop, Loop *RemainderLoop, uint64_t UF); /// Utility that implements appending of loops onto a worklist given a range. /// We want to process loops in postorder, but the worklist is a LIFO data /// structure, so we append to it in *reverse* postorder. /// For trees, a preorder traversal is a viable reverse postorder, so we /// actually append using a preorder walk algorithm. template void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist &); /// Utility that implements appending of loops onto a worklist given a range. /// It has the same behavior as appendLoopsToWorklist, but assumes the range of /// loops has already been reversed, so it processes loops in the given order. template void appendReversedLoopsToWorklist(RangeT &&, SmallPriorityWorklist &); /// Utility that implements appending of loops onto a worklist given LoopInfo. /// Calls the templated utility taking a Range of loops, handing it the Loops /// in LoopInfo, iterated in reverse. This is because the loops are stored in /// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling, /// loop deletion, and LICM, we largely want to work forward across the CFG so /// that we visit defs before uses and can propagate simplifications from one /// loop nest into the next. Calls appendReversedLoopsToWorklist with the /// already reversed loops in LI. /// FIXME: Consider changing the order in LoopInfo. void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist &); /// Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM); /// Add code that checks at runtime if the accessed arrays in \p PointerChecks /// overlap. Returns the final comparator value or NULL if no check is needed. Value * addRuntimeChecks(Instruction *Loc, Loop *TheLoop, const SmallVectorImpl &PointerChecks, SCEVExpander &Expander); /// Struct to hold information about a partially invariant condition. struct IVConditionInfo { /// Instructions that need to be duplicated and checked for the unswitching /// condition. SmallVector InstToDuplicate; /// Constant to indicate for which value the condition is invariant. Constant *KnownValue = nullptr; /// True if the partially invariant path is no-op (=does not have any /// side-effects and no loop value is used outside the loop). bool PathIsNoop = true; /// If the partially invariant path reaches a single exit block, ExitForPath /// is set to that block. Otherwise it is nullptr. BasicBlock *ExitForPath = nullptr; }; /// Check if the loop header has a conditional branch that is not /// loop-invariant, because it involves load instructions. If all paths from /// either the true or false successor to the header or loop exists do not /// modify the memory feeding the condition, perform 'partial unswitching'. That /// is, duplicate the instructions feeding the condition in the pre-header. Then /// unswitch on the duplicated condition. The condition is now known in the /// unswitched version for the 'invariant' path through the original loop. /// /// If the branch condition of the header is partially invariant, return a pair /// containing the instructions to duplicate and a boolean Constant to update /// the condition in the loops created for the true or false successors. Optional hasPartialIVCondition(Loop &L, unsigned MSSAThreshold, MemorySSA &MSSA, AAResults &AA); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index fb3a7490346f..7af879638a4d 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -1,77 +1,82 @@ //===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // A set of parameters used to control the transforms in the SimplifyCFG pass. // Options may change depending on the position in the optimization pipeline. // For example, canonical form that includes switches and branches may later be // replaced by lookup tables and selects. // //===----------------------------------------------------------------------===// #ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H #define LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H namespace llvm { class AssumptionCache; struct SimplifyCFGOptions { int BonusInstThreshold = 1; bool ForwardSwitchCondToPhi = false; + bool ConvertSwitchRangeToICmp = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; bool HoistCommonInsts = false; bool SinkCommonInsts = false; bool SimplifyCondBranch = true; bool FoldTwoEntryPHINode = true; AssumptionCache *AC = nullptr; // Support 'builder' pattern to set members by name at construction time. SimplifyCFGOptions &bonusInstThreshold(int I) { BonusInstThreshold = I; return *this; } SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) { ForwardSwitchCondToPhi = B; return *this; } + SimplifyCFGOptions &convertSwitchRangeToICmp(bool B) { + ConvertSwitchRangeToICmp = B; + return *this; + } SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { ConvertSwitchToLookupTable = B; return *this; } SimplifyCFGOptions &needCanonicalLoops(bool B) { NeedCanonicalLoop = B; return *this; } SimplifyCFGOptions &hoistCommonInsts(bool B) { HoistCommonInsts = B; return *this; } SimplifyCFGOptions &sinkCommonInsts(bool B) { SinkCommonInsts = B; return *this; } SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) { AC = Cache; return *this; } SimplifyCFGOptions &setSimplifyCondBranch(bool B) { SimplifyCondBranch = B; return *this; } SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) { FoldTwoEntryPHINode = B; return *this; } }; } // namespace llvm #endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index d5411d916c77..cd5314e7a17a 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -1,3126 +1,3135 @@ //===- InlineCost.cpp - Cost analysis for inliner -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements inline cost analysis. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/InlineCost.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "inline-cost" STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); static cl::opt DefaultThreshold("inlinedefault-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Default amount of inlining to perform")); static cl::opt PrintInstructionComments( "print-instruction-comments", cl::Hidden, cl::init(false), cl::desc("Prints comments for instruction based on inline cost analysis")); static cl::opt InlineThreshold( "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore, cl::desc("Control the amount of inlining to perform (default = 225)")); static cl::opt HintThreshold( "inlinehint-threshold", cl::Hidden, cl::init(325), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with inline hint")); static cl::opt ColdCallSiteThreshold("inline-cold-callsite-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining cold callsites")); static cl::opt InlineEnableCostBenefitAnalysis( "inline-enable-cost-benefit-analysis", cl::Hidden, cl::init(false), cl::desc("Enable the cost-benefit analysis for the inliner")); static cl::opt InlineSavingsMultiplier( "inline-savings-multiplier", cl::Hidden, cl::init(8), cl::ZeroOrMore, cl::desc("Multiplier to multiply cycle savings by during inlining")); static cl::opt InlineSizeAllowance("inline-size-allowance", cl::Hidden, cl::init(100), cl::ZeroOrMore, cl::desc("The maximum size of a callee that get's " "inlined without sufficient cycle savings")); // We introduce this threshold to help performance of instrumentation based // PGO before we actually hook up inliner with analysis passes such as BPI and // BFI. static cl::opt ColdThreshold( "inlinecold-threshold", cl::Hidden, cl::init(45), cl::ZeroOrMore, cl::desc("Threshold for inlining functions with cold attribute")); static cl::opt HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000), cl::ZeroOrMore, cl::desc("Threshold for hot callsites ")); static cl::opt LocallyHotCallSiteThreshold( "locally-hot-callsite-threshold", cl::Hidden, cl::init(525), cl::ZeroOrMore, cl::desc("Threshold for locally hot callsites ")); static cl::opt ColdCallSiteRelFreq( "cold-callsite-rel-freq", cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::desc("Maximum block frequency, expressed as a percentage of caller's " "entry frequency, for a callsite to be cold in the absence of " "profile information.")); static cl::opt HotCallSiteRelFreq( "hot-callsite-rel-freq", cl::Hidden, cl::init(60), cl::ZeroOrMore, cl::desc("Minimum block frequency, expressed as a multiple of caller's " "entry frequency, for a callsite to be hot in the absence of " "profile information.")); static cl::opt CallPenalty( "inline-call-penalty", cl::Hidden, cl::init(25), cl::desc("Call penalty that is applied per callsite when inlining")); static cl::opt OptComputeFullInlineCost( "inline-cost-full", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::desc("Compute the full inline cost of a call site even when the cost " "exceeds the threshold.")); static cl::opt InlineCallerSupersetNoBuiltin( "inline-caller-superset-nobuiltin", cl::Hidden, cl::init(true), cl::ZeroOrMore, cl::desc("Allow inlining when caller has a superset of callee's nobuiltin " "attributes.")); static cl::opt DisableGEPConstOperand( "disable-gep-const-evaluation", cl::Hidden, cl::init(false), cl::desc("Disables evaluation of GetElementPtr with constant operands")); namespace { -class InlineCostCallAnalyzer; - /// This function behaves more like CallBase::hasFnAttr: when it looks for the /// requested attribute, it check both the call instruction and the called /// function (if it's available and operand bundles don't prohibit that). Attribute getFnAttr(CallBase &CB, StringRef AttrKind) { Attribute CallAttr = CB.getFnAttr(AttrKind); if (CallAttr.isValid()) return CallAttr; // Operand bundles override attributes on the called function, but don't // override attributes directly present on the call instruction. if (!CB.isFnAttrDisallowedByOpBundle(AttrKind)) if (const Function *F = CB.getCalledFunction()) return F->getFnAttribute(AttrKind); return {}; } +} // namespace +namespace llvm { Optional getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) { Attribute Attr = getFnAttr(CB, AttrKind); int AttrValue; if (Attr.getValueAsString().getAsInteger(10, AttrValue)) return None; return AttrValue; } +} // namespace llvm + +namespace { +class InlineCostCallAnalyzer; // This struct is used to store information about inline cost of a // particular instruction struct InstructionCostDetail { int CostBefore = 0; int CostAfter = 0; int ThresholdBefore = 0; int ThresholdAfter = 0; int getThresholdDelta() const { return ThresholdAfter - ThresholdBefore; } int getCostDelta() const { return CostAfter - CostBefore; } bool hasThresholdChanged() const { return ThresholdAfter != ThresholdBefore; } }; class InlineCostAnnotationWriter : public AssemblyAnnotationWriter { private: InlineCostCallAnalyzer *const ICCA; public: InlineCostAnnotationWriter(InlineCostCallAnalyzer *ICCA) : ICCA(ICCA) {} virtual void emitInstructionAnnot(const Instruction *I, formatted_raw_ostream &OS) override; }; /// Carry out call site analysis, in order to evaluate inlinability. /// NOTE: the type is currently used as implementation detail of functions such /// as llvm::getInlineCost. Note the function_ref constructor parameters - the /// expectation is that they come from the outer scope, from the wrapper /// functions. If we want to support constructing CallAnalyzer objects where /// lambdas are provided inline at construction, or where the object needs to /// otherwise survive past the scope of the provided functions, we need to /// revisit the argument types. class CallAnalyzer : public InstVisitor { typedef InstVisitor Base; friend class InstVisitor; protected: virtual ~CallAnalyzer() {} /// The TargetTransformInfo available for this compilation. const TargetTransformInfo &TTI; /// Getter for the cache of @llvm.assume intrinsics. function_ref GetAssumptionCache; /// Getter for BlockFrequencyInfo function_ref GetBFI; /// Profile summary information. ProfileSummaryInfo *PSI; /// The called function. Function &F; // Cache the DataLayout since we use it a lot. const DataLayout &DL; /// The OptimizationRemarkEmitter available for this compilation. OptimizationRemarkEmitter *ORE; /// The candidate callsite being analyzed. Please do not use this to do /// analysis in the caller function; we want the inline cost query to be /// easily cacheable. Instead, use the cover function paramHasAttr. CallBase &CandidateCall; /// Extension points for handling callsite features. // Called before a basic block was analyzed. virtual void onBlockStart(const BasicBlock *BB) {} /// Called after a basic block was analyzed. virtual void onBlockAnalyzed(const BasicBlock *BB) {} /// Called before an instruction was analyzed virtual void onInstructionAnalysisStart(const Instruction *I) {} /// Called after an instruction was analyzed virtual void onInstructionAnalysisFinish(const Instruction *I) {} /// Called at the end of the analysis of the callsite. Return the outcome of /// the analysis, i.e. 'InlineResult(true)' if the inlining may happen, or /// the reason it can't. virtual InlineResult finalizeAnalysis() { return InlineResult::success(); } /// Called when we're about to start processing a basic block, and every time /// we are done processing an instruction. Return true if there is no point in /// continuing the analysis (e.g. we've determined already the call site is /// too expensive to inline) virtual bool shouldStop() { return false; } /// Called before the analysis of the callee body starts (with callsite /// contexts propagated). It checks callsite-specific information. Return a /// reason analysis can't continue if that's the case, or 'true' if it may /// continue. virtual InlineResult onAnalysisStart() { return InlineResult::success(); } /// Called if the analysis engine decides SROA cannot be done for the given /// alloca. virtual void onDisableSROA(AllocaInst *Arg) {} /// Called the analysis engine determines load elimination won't happen. virtual void onDisableLoadElimination() {} /// Called when we visit a CallBase, before the analysis starts. Return false /// to stop further processing of the instruction. virtual bool onCallBaseVisitStart(CallBase &Call) { return true; } /// Called to account for a call. virtual void onCallPenalty() {} /// Called to account for the expectation the inlining would result in a load /// elimination. virtual void onLoadEliminationOpportunity() {} /// Called to account for the cost of argument setup for the Call in the /// callee's body (not the callsite currently under analysis). virtual void onCallArgumentSetup(const CallBase &Call) {} /// Called to account for a load relative intrinsic. virtual void onLoadRelativeIntrinsic() {} /// Called to account for a lowered call. virtual void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) { } /// Account for a jump table of given size. Return false to stop further /// processing the switch instruction virtual bool onJumpTable(unsigned JumpTableSize) { return true; } /// Account for a case cluster of given size. Return false to stop further /// processing of the instruction. virtual bool onCaseCluster(unsigned NumCaseCluster) { return true; } /// Called at the end of processing a switch instruction, with the given /// number of case clusters. virtual void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster) {} /// Called to account for any other instruction not specifically accounted /// for. virtual void onMissedSimplification() {} /// Start accounting potential benefits due to SROA for the given alloca. virtual void onInitializeSROAArg(AllocaInst *Arg) {} /// Account SROA savings for the AllocaInst value. virtual void onAggregateSROAUse(AllocaInst *V) {} bool handleSROA(Value *V, bool DoNotDisable) { // Check for SROA candidates in comparisons. if (auto *SROAArg = getSROAArgForValueOrNull(V)) { if (DoNotDisable) { onAggregateSROAUse(SROAArg); return true; } disableSROAForArg(SROAArg); } return false; } bool IsCallerRecursive = false; bool IsRecursiveCall = false; bool ExposesReturnsTwice = false; bool HasDynamicAlloca = false; bool ContainsNoDuplicateCall = false; bool HasReturn = false; bool HasIndirectBr = false; bool HasUninlineableIntrinsic = false; bool InitsVargArgs = false; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize = 0; unsigned NumInstructions = 0; unsigned NumVectorInstructions = 0; /// While we walk the potentially-inlined instructions, we build up and /// maintain a mapping of simplified values specific to this callsite. The /// idea is to propagate any special information we have about arguments to /// this call through the inlinable section of the function, and account for /// likely simplifications post-inlining. The most important aspect we track /// is CFG altering simplifications -- when we prove a basic block dead, that /// can cause dramatic shifts in the cost of inlining a function. DenseMap SimplifiedValues; /// Keep track of the values which map back (through function arguments) to /// allocas on the caller stack which could be simplified through SROA. DenseMap SROAArgValues; /// Keep track of Allocas for which we believe we may get SROA optimization. DenseSet EnabledSROAAllocas; /// Keep track of values which map to a pointer base and constant offset. DenseMap> ConstantOffsetPtrs; /// Keep track of dead blocks due to the constant arguments. SetVector DeadBlocks; /// The mapping of the blocks to their known unique successors due to the /// constant arguments. DenseMap KnownSuccessors; /// Model the elimination of repeated loads that is expected to happen /// whenever we simplify away the stores that would otherwise cause them to be /// loads. bool EnableLoadElimination = true; /// Whether we allow inlining for recursive call. bool AllowRecursiveCall = false; SmallPtrSet LoadAddrSet; AllocaInst *getSROAArgForValueOrNull(Value *V) const { auto It = SROAArgValues.find(V); if (It == SROAArgValues.end() || EnabledSROAAllocas.count(It->second) == 0) return nullptr; return It->second; } // Custom simplification helper routines. bool isAllocaDerivedArg(Value *V); void disableSROAForArg(AllocaInst *SROAArg); void disableSROA(Value *V); void findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB); void disableLoadElimination(); bool isGEPFree(GetElementPtrInst &GEP); bool canFoldInboundsGEP(GetElementPtrInst &I); bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallBase &Call); template bool simplifyInstruction(Instruction &I, Callable Evaluate); bool simplifyIntrinsicCallIsConstant(CallBase &CB); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); /// Return true if the given argument to the function being considered for /// inlining has the given attribute set either at the call site or the /// function declaration. Primarily used to inspect call site specific /// attributes since these can be more precise than the ones on the callee /// itself. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); /// Return true if the given value is known non null within the callee if /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); /// Return true if size growth is allowed when inlining the callee at \p Call. bool allowSizeGrowth(CallBase &Call); // Custom analysis routines. InlineResult analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues); // Disable several entry points to the visitor so we don't accidentally use // them by declaring but not defining them here. void visit(Module *); void visit(Module &); void visit(Function *); void visit(Function &); void visit(BasicBlock *); void visit(BasicBlock &); // Provide base case for our instruction visit. bool visitInstruction(Instruction &I); // Our visit overrides. bool visitAlloca(AllocaInst &I); bool visitPHI(PHINode &I); bool visitGetElementPtr(GetElementPtrInst &I); bool visitBitCast(BitCastInst &I); bool visitPtrToInt(PtrToIntInst &I); bool visitIntToPtr(IntToPtrInst &I); bool visitCastInst(CastInst &I); bool visitCmpInst(CmpInst &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); bool visitFNeg(UnaryOperator &I); bool visitLoad(LoadInst &I); bool visitStore(StoreInst &I); bool visitExtractValue(ExtractValueInst &I); bool visitInsertValue(InsertValueInst &I); bool visitCallBase(CallBase &Call); bool visitReturnInst(ReturnInst &RI); bool visitBranchInst(BranchInst &BI); bool visitSelectInst(SelectInst &SI); bool visitSwitchInst(SwitchInst &SI); bool visitIndirectBrInst(IndirectBrInst &IBI); bool visitResumeInst(ResumeInst &RI); bool visitCleanupReturnInst(CleanupReturnInst &RI); bool visitCatchReturnInst(CatchReturnInst &RI); bool visitUnreachableInst(UnreachableInst &I); public: CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr) : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI), PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE), CandidateCall(Call) {} InlineResult analyze(); Optional getSimplifiedValue(Instruction *I) { if (SimplifiedValues.find(I) != SimplifiedValues.end()) return SimplifiedValues[I]; return None; } // Keep a bunch of stats about the cost savings found so we can print them // out when debugging. unsigned NumConstantArgs = 0; unsigned NumConstantOffsetPtrArgs = 0; unsigned NumAllocaArgs = 0; unsigned NumConstantPtrCmps = 0; unsigned NumConstantPtrDiffs = 0; unsigned NumInstructionsSimplified = 0; void dump(); }; // Considering forming a binary search, we should find the number of nodes // which is same as the number of comparisons when lowered. For a given // number of clusters, n, we can define a recursive function, f(n), to find // the number of nodes in the tree. The recursion is : // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3, // and f(n) = n, when n <= 3. // This will lead a binary tree where the leaf should be either f(2) or f(3) // when n > 3. So, the number of comparisons from leaves should be n, while // the number of non-leaf should be : // 2^(log2(n) - 1) - 1 // = 2^log2(n) * 2^-1 - 1 // = n / 2 - 1. // Considering comparisons from leaf and non-leaf nodes, we can estimate the // number of comparisons in a simple closed form : // n + n / 2 - 1 = n * 3 / 2 - 1 int64_t getExpectedNumberOfCompare(int NumCaseCluster) { return 3 * static_cast(NumCaseCluster) / 2 - 1; } /// FIXME: if it is necessary to derive from InlineCostCallAnalyzer, note /// the FIXME in onLoweredCall, when instantiating an InlineCostCallAnalyzer class InlineCostCallAnalyzer final : public CallAnalyzer { const int CostUpperBound = INT_MAX - InlineConstants::InstrCost - 1; const bool ComputeFullInlineCost; int LoadEliminationCost = 0; /// Bonus to be applied when percentage of vector instructions in callee is /// high (see more details in updateThreshold). int VectorBonus = 0; /// Bonus to be applied when the callee has only one reachable basic block. int SingleBBBonus = 0; /// Tunable parameters that control the analysis. const InlineParams &Params; // This DenseMap stores the delta change in cost and threshold after // accounting for the given instruction. The map is filled only with the // flag PrintInstructionComments on. DenseMap InstructionCostDetailMap; /// Upper bound for the inlining cost. Bonuses are being applied to account /// for speculative "expected profit" of the inlining decision. int Threshold = 0; /// Attempt to evaluate indirect calls to boost its inline cost. const bool BoostIndirectCalls; /// Ignore the threshold when finalizing analysis. const bool IgnoreThreshold; // True if the cost-benefit-analysis-based inliner is enabled. const bool CostBenefitAnalysisEnabled; /// Inlining cost measured in abstract units, accounts for all the /// instructions expected to be executed for a given function invocation. /// Instructions that are statically proven to be dead based on call-site /// arguments are not counted here. int Cost = 0; // The cumulative cost at the beginning of the basic block being analyzed. At // the end of analyzing each basic block, "Cost - CostAtBBStart" represents // the size of that basic block. int CostAtBBStart = 0; // The static size of live but cold basic blocks. This is "static" in the // sense that it's not weighted by profile counts at all. int ColdSize = 0; // Whether inlining is decided by cost-threshold analysis. bool DecidedByCostThreshold = false; // Whether inlining is decided by cost-benefit analysis. bool DecidedByCostBenefit = false; // The cost-benefit pair computed by cost-benefit analysis. Optional CostBenefit = None; bool SingleBB = true; unsigned SROACostSavings = 0; unsigned SROACostSavingsLost = 0; /// The mapping of caller Alloca values to their accumulated cost savings. If /// we have to disable SROA for one of the allocas, this tells us how much /// cost must be added. DenseMap SROAArgCosts; /// Return true if \p Call is a cold callsite. bool isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI); /// Update Threshold based on callsite properties such as callee /// attributes and callee hotness for PGO builds. The Callee is explicitly /// passed to support analyzing indirect calls whose target is inferred by /// analysis. void updateThreshold(CallBase &Call, Function &Callee); /// Return a higher threshold if \p Call is a hot callsite. Optional getHotCallSiteThreshold(CallBase &Call, BlockFrequencyInfo *CallerBFI); /// Handle a capped 'int' increment for Cost. void addCost(int64_t Inc, int64_t UpperBound = INT_MAX) { assert(UpperBound > 0 && UpperBound <= INT_MAX && "invalid upper bound"); Cost = std::min(UpperBound, Cost + Inc); } void onDisableSROA(AllocaInst *Arg) override { auto CostIt = SROAArgCosts.find(Arg); if (CostIt == SROAArgCosts.end()) return; addCost(CostIt->second); SROACostSavings -= CostIt->second; SROACostSavingsLost += CostIt->second; SROAArgCosts.erase(CostIt); } void onDisableLoadElimination() override { addCost(LoadEliminationCost); LoadEliminationCost = 0; } bool onCallBaseVisitStart(CallBase &Call) override { if (Optional AttrCallThresholdBonus = getStringFnAttrAsInt(Call, "call-threshold-bonus")) Threshold += *AttrCallThresholdBonus; if (Optional AttrCallCost = getStringFnAttrAsInt(Call, "call-inline-cost")) { addCost(*AttrCallCost); // Prevent further processing of the call since we want to override its // inline cost, not just add to it. return false; } return true; } void onCallPenalty() override { addCost(CallPenalty); } void onCallArgumentSetup(const CallBase &Call) override { // Pay the price of the argument setup. We account for the average 1 // instruction per call argument setup here. addCost(Call.arg_size() * InlineConstants::InstrCost); } void onLoadRelativeIntrinsic() override { // This is normally lowered to 4 LLVM instructions. addCost(3 * InlineConstants::InstrCost); } void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) override { // We account for the average 1 instruction per call argument setup here. addCost(Call.arg_size() * InlineConstants::InstrCost); // If we have a constant that we are calling as a function, we can peer // through it and see the function target. This happens not infrequently // during devirtualization and so we want to give it a hefty bonus for // inlining, but cap that bonus in the event that inlining wouldn't pan out. // Pretend to inline the function, with a custom threshold. if (IsIndirectCall && BoostIndirectCalls) { auto IndirectCallParams = Params; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; /// FIXME: if InlineCostCallAnalyzer is derived from, this may need /// to instantiate the derived class. InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, GetAssumptionCache, GetBFI, PSI, ORE, false); if (CA.analyze().isSuccess()) { // We were able to inline the indirect call! Subtract the cost from the // threshold to get the bonus we want to apply, but don't go below zero. Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } } else // Otherwise simply add the cost for merely making the call. addCost(CallPenalty); } void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster) override { // If suitable for a jump table, consider the cost for the table size and // branch to destination. // Maximum valid cost increased in this function. if (JumpTableSize) { int64_t JTCost = static_cast(JumpTableSize) * InlineConstants::InstrCost + 4 * InlineConstants::InstrCost; addCost(JTCost, static_cast(CostUpperBound)); return; } if (NumCaseCluster <= 3) { // Suppose a comparison includes one compare and one conditional branch. addCost(NumCaseCluster * 2 * InlineConstants::InstrCost); return; } int64_t ExpectedNumberOfCompare = getExpectedNumberOfCompare(NumCaseCluster); int64_t SwitchCost = ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost; addCost(SwitchCost, static_cast(CostUpperBound)); } void onMissedSimplification() override { addCost(InlineConstants::InstrCost); } void onInitializeSROAArg(AllocaInst *Arg) override { assert(Arg != nullptr && "Should not initialize SROA costs for null value."); SROAArgCosts[Arg] = 0; } void onAggregateSROAUse(AllocaInst *SROAArg) override { auto CostIt = SROAArgCosts.find(SROAArg); assert(CostIt != SROAArgCosts.end() && "expected this argument to have a cost"); CostIt->second += InlineConstants::InstrCost; SROACostSavings += InlineConstants::InstrCost; } void onBlockStart(const BasicBlock *BB) override { CostAtBBStart = Cost; } void onBlockAnalyzed(const BasicBlock *BB) override { if (CostBenefitAnalysisEnabled) { // Keep track of the static size of live but cold basic blocks. For now, // we define a cold basic block to be one that's never executed. assert(GetBFI && "GetBFI must be available"); BlockFrequencyInfo *BFI = &(GetBFI(F)); assert(BFI && "BFI must be available"); auto ProfileCount = BFI->getBlockProfileCount(BB); assert(ProfileCount.hasValue()); if (ProfileCount.getValue() == 0) ColdSize += Cost - CostAtBBStart; } auto *TI = BB->getTerminator(); // If we had any successors at this point, than post-inlining is likely to // have them as well. Note that we assume any basic blocks which existed // due to branches or switches which folded above will also fold after // inlining. if (SingleBB && TI->getNumSuccessors() > 1) { // Take off the bonus we applied to the threshold. Threshold -= SingleBBBonus; SingleBB = false; } } void onInstructionAnalysisStart(const Instruction *I) override { // This function is called to store the initial cost of inlining before // the given instruction was assessed. if (!PrintInstructionComments) return; InstructionCostDetailMap[I].CostBefore = Cost; InstructionCostDetailMap[I].ThresholdBefore = Threshold; } void onInstructionAnalysisFinish(const Instruction *I) override { // This function is called to find new values of cost and threshold after // the instruction has been assessed. if (!PrintInstructionComments) return; InstructionCostDetailMap[I].CostAfter = Cost; InstructionCostDetailMap[I].ThresholdAfter = Threshold; } bool isCostBenefitAnalysisEnabled() { if (!PSI || !PSI->hasProfileSummary()) return false; if (!GetBFI) return false; if (InlineEnableCostBenefitAnalysis.getNumOccurrences()) { // Honor the explicit request from the user. if (!InlineEnableCostBenefitAnalysis) return false; } else { // Otherwise, require instrumentation profile. if (!PSI->hasInstrumentationProfile()) return false; } auto *Caller = CandidateCall.getParent()->getParent(); if (!Caller->getEntryCount()) return false; BlockFrequencyInfo *CallerBFI = &(GetBFI(*Caller)); if (!CallerBFI) return false; // For now, limit to hot call site. if (!PSI->isHotCallSite(CandidateCall, CallerBFI)) return false; // Make sure we have a nonzero entry count. auto EntryCount = F.getEntryCount(); if (!EntryCount || !EntryCount->getCount()) return false; BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); if (!CalleeBFI) return false; return true; } // Determine whether we should inline the given call site, taking into account // both the size cost and the cycle savings. Return None if we don't have // suficient profiling information to determine. Optional costBenefitAnalysis() { if (!CostBenefitAnalysisEnabled) return None; // buildInlinerPipeline in the pass builder sets HotCallSiteThreshold to 0 // for the prelink phase of the AutoFDO + ThinLTO build. Honor the logic by // falling back to the cost-based metric. // TODO: Improve this hacky condition. if (Threshold == 0) return None; assert(GetBFI); BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); assert(CalleeBFI); // The cycle savings expressed as the sum of InlineConstants::InstrCost // multiplied by the estimated dynamic count of each instruction we can // avoid. Savings come from the call site cost, such as argument setup and // the call instruction, as well as the instructions that are folded. // // We use 128-bit APInt here to avoid potential overflow. This variable // should stay well below 10^^24 (or 2^^80) in practice. This "worst" case // assumes that we can avoid or fold a billion instructions, each with a // profile count of 10^^15 -- roughly the number of cycles for a 24-hour // period on a 4GHz machine. APInt CycleSavings(128, 0); for (auto &BB : F) { APInt CurrentSavings(128, 0); for (auto &I : BB) { if (BranchInst *BI = dyn_cast(&I)) { // Count a conditional branch as savings if it becomes unconditional. if (BI->isConditional() && isa_and_nonnull( SimplifiedValues.lookup(BI->getCondition()))) { CurrentSavings += InlineConstants::InstrCost; } } else if (Value *V = dyn_cast(&I)) { // Count an instruction as savings if we can fold it. if (SimplifiedValues.count(V)) { CurrentSavings += InlineConstants::InstrCost; } } } auto ProfileCount = CalleeBFI->getBlockProfileCount(&BB); assert(ProfileCount.hasValue()); CurrentSavings *= ProfileCount.getValue(); CycleSavings += CurrentSavings; } // Compute the cycle savings per call. auto EntryProfileCount = F.getEntryCount(); assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount()); auto EntryCount = EntryProfileCount->getCount(); CycleSavings += EntryCount / 2; CycleSavings = CycleSavings.udiv(EntryCount); // Compute the total savings for the call site. auto *CallerBB = CandidateCall.getParent(); BlockFrequencyInfo *CallerBFI = &(GetBFI(*(CallerBB->getParent()))); CycleSavings += getCallsiteCost(this->CandidateCall, DL); CycleSavings *= CallerBFI->getBlockProfileCount(CallerBB).getValue(); // Remove the cost of the cold basic blocks. int Size = Cost - ColdSize; // Allow tiny callees to be inlined regardless of whether they meet the // savings threshold. Size = Size > InlineSizeAllowance ? Size - InlineSizeAllowance : 1; CostBenefit.emplace(APInt(128, Size), CycleSavings); // Return true if the savings justify the cost of inlining. Specifically, // we evaluate the following inequality: // // CycleSavings PSI->getOrCompHotCountThreshold() // -------------- >= ----------------------------------- // Size InlineSavingsMultiplier // // Note that the left hand side is specific to a call site. The right hand // side is a constant for the entire executable. APInt LHS = CycleSavings; LHS *= InlineSavingsMultiplier; APInt RHS(128, PSI->getOrCompHotCountThreshold()); RHS *= Size; return LHS.uge(RHS); } InlineResult finalizeAnalysis() override { // Loops generally act a lot like calls in that they act like barriers to // movement, require a certain amount of setup, etc. So when optimising for // size, we penalise any call sites that perform loops. We do this after all // other costs here, so will likely only be dealing with relatively small // functions (and hence DT and LI will hopefully be cheap). auto *Caller = CandidateCall.getFunction(); if (Caller->hasMinSize()) { DominatorTree DT(F); LoopInfo LI(DT); int NumLoops = 0; for (Loop *L : LI) { // Ignore loops that will not be executed if (DeadBlocks.count(L->getHeader())) continue; NumLoops++; } addCost(NumLoops * InlineConstants::LoopPenalty); } // We applied the maximum possible vector bonus at the beginning. Now, // subtract the excess bonus, if any, from the Threshold before // comparing against Cost. if (NumVectorInstructions <= NumInstructions / 10) Threshold -= VectorBonus; else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; if (Optional AttrCost = getStringFnAttrAsInt(CandidateCall, "function-inline-cost")) Cost = *AttrCost; + if (Optional AttrCostMult = getStringFnAttrAsInt( + CandidateCall, + InlineConstants::FunctionInlineCostMultiplierAttributeName)) + Cost *= *AttrCostMult; + if (Optional AttrThreshold = getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) Threshold = *AttrThreshold; if (auto Result = costBenefitAnalysis()) { DecidedByCostBenefit = true; if (Result.getValue()) return InlineResult::success(); else return InlineResult::failure("Cost over threshold."); } if (IgnoreThreshold) return InlineResult::success(); DecidedByCostThreshold = true; return Cost < std::max(1, Threshold) ? InlineResult::success() : InlineResult::failure("Cost over threshold."); } bool shouldStop() override { if (IgnoreThreshold || ComputeFullInlineCost) return false; // Bail out the moment we cross the threshold. This means we'll under-count // the cost, but only when undercounting doesn't matter. if (Cost < Threshold) return false; DecidedByCostThreshold = true; return true; } void onLoadEliminationOpportunity() override { LoadEliminationCost += InlineConstants::InstrCost; } InlineResult onAnalysisStart() override { // Perform some tweaks to the cost and threshold based on the direct // callsite information. // We want to more aggressively inline vector-dense kernels, so up the // threshold, and we'll lower it if the % of vector instructions gets too // low. Note that these bonuses are some what arbitrary and evolved over // time by accident as much as because they are principled bonuses. // // FIXME: It would be nice to remove all such bonuses. At least it would be // nice to base the bonus values on something more scientific. assert(NumInstructions == 0); assert(NumVectorInstructions == 0); // Update the threshold based on callsite properties updateThreshold(CandidateCall, F); // While Threshold depends on commandline options that can take negative // values, we want to enforce the invariant that the computed threshold and // bonuses are non-negative. assert(Threshold >= 0); assert(SingleBBBonus >= 0); assert(VectorBonus >= 0); // Speculatively apply all possible bonuses to Threshold. If cost exceeds // this Threshold any time, and cost cannot decrease, we can stop processing // the rest of the function body. Threshold += (SingleBBBonus + VectorBonus); // Give out bonuses for the callsite, as the instructions setting them up // will be gone after inlining. addCost(-getCallsiteCost(this->CandidateCall, DL)); // If this function uses the coldcc calling convention, prefer not to inline // it. if (F.getCallingConv() == CallingConv::Cold) Cost += InlineConstants::ColdccPenalty; // Check if we're done. This can happen due to bonuses and penalties. if (Cost >= Threshold && !ComputeFullInlineCost) return InlineResult::failure("high cost"); return InlineResult::success(); } public: InlineCostCallAnalyzer( Function &Callee, CallBase &Call, const InlineParams &Params, const TargetTransformInfo &TTI, function_ref GetAssumptionCache, function_ref GetBFI = nullptr, ProfileSummaryInfo *PSI = nullptr, OptimizationRemarkEmitter *ORE = nullptr, bool BoostIndirect = true, bool IgnoreThreshold = false) : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI, ORE), ComputeFullInlineCost(OptComputeFullInlineCost || Params.ComputeFullInlineCost || ORE || isCostBenefitAnalysisEnabled()), Params(Params), Threshold(Params.DefaultThreshold), BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold), CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()), Writer(this) { AllowRecursiveCall = Params.AllowRecursiveCall.getValue(); } /// Annotation Writer for instruction details InlineCostAnnotationWriter Writer; void dump(); // Prints the same analysis as dump(), but its definition is not dependent // on the build. void print(raw_ostream &OS); Optional getCostDetails(const Instruction *I) { if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end()) return InstructionCostDetailMap[I]; return None; } virtual ~InlineCostCallAnalyzer() {} int getThreshold() const { return Threshold; } int getCost() const { return Cost; } Optional getCostBenefitPair() { return CostBenefit; } bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; } bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; } }; class InlineCostFeaturesAnalyzer final : public CallAnalyzer { private: InlineCostFeatures Cost = {}; // FIXME: These constants are taken from the heuristic-based cost visitor. // These should be removed entirely in a later revision to avoid reliance on // heuristics in the ML inliner. static constexpr int JTCostMultiplier = 4; static constexpr int CaseClusterCostMultiplier = 2; static constexpr int SwitchCostMultiplier = 2; // FIXME: These are taken from the heuristic-based cost visitor: we should // eventually abstract these to the CallAnalyzer to avoid duplication. unsigned SROACostSavingOpportunities = 0; int VectorBonus = 0; int SingleBBBonus = 0; int Threshold = 5; DenseMap SROACosts; void increment(InlineCostFeatureIndex Feature, int64_t Delta = 1) { Cost[static_cast(Feature)] += Delta; } void set(InlineCostFeatureIndex Feature, int64_t Value) { Cost[static_cast(Feature)] = Value; } void onDisableSROA(AllocaInst *Arg) override { auto CostIt = SROACosts.find(Arg); if (CostIt == SROACosts.end()) return; increment(InlineCostFeatureIndex::SROALosses, CostIt->second); SROACostSavingOpportunities -= CostIt->second; SROACosts.erase(CostIt); } void onDisableLoadElimination() override { set(InlineCostFeatureIndex::LoadElimination, 1); } void onCallPenalty() override { increment(InlineCostFeatureIndex::CallPenalty, CallPenalty); } void onCallArgumentSetup(const CallBase &Call) override { increment(InlineCostFeatureIndex::CallArgumentSetup, Call.arg_size() * InlineConstants::InstrCost); } void onLoadRelativeIntrinsic() override { increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 * InlineConstants::InstrCost); } void onLoweredCall(Function *F, CallBase &Call, bool IsIndirectCall) override { increment(InlineCostFeatureIndex::LoweredCallArgSetup, Call.arg_size() * InlineConstants::InstrCost); if (IsIndirectCall) { InlineParams IndirectCallParams = {/* DefaultThreshold*/ 0, /*HintThreshold*/ {}, /*ColdThreshold*/ {}, /*OptSizeThreshold*/ {}, /*OptMinSizeThreshold*/ {}, /*HotCallSiteThreshold*/ {}, /*LocallyHotCallSiteThreshold*/ {}, /*ColdCallSiteThreshold*/ {}, /*ComputeFullInlineCost*/ true, /*EnableDeferral*/ true}; IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold; InlineCostCallAnalyzer CA(*F, Call, IndirectCallParams, TTI, GetAssumptionCache, GetBFI, PSI, ORE, false, true); if (CA.analyze().isSuccess()) { increment(InlineCostFeatureIndex::NestedInlineCostEstimate, CA.getCost()); increment(InlineCostFeatureIndex::NestedInlines, 1); } } else { onCallPenalty(); } } void onFinalizeSwitch(unsigned JumpTableSize, unsigned NumCaseCluster) override { if (JumpTableSize) { int64_t JTCost = static_cast(JumpTableSize) * InlineConstants::InstrCost + JTCostMultiplier * InlineConstants::InstrCost; increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost); return; } if (NumCaseCluster <= 3) { increment(InlineCostFeatureIndex::CaseClusterPenalty, NumCaseCluster * CaseClusterCostMultiplier * InlineConstants::InstrCost); return; } int64_t ExpectedNumberOfCompare = getExpectedNumberOfCompare(NumCaseCluster); int64_t SwitchCost = ExpectedNumberOfCompare * SwitchCostMultiplier * InlineConstants::InstrCost; increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost); } void onMissedSimplification() override { increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions, InlineConstants::InstrCost); } void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; } void onAggregateSROAUse(AllocaInst *Arg) override { SROACosts.find(Arg)->second += InlineConstants::InstrCost; SROACostSavingOpportunities += InlineConstants::InstrCost; } void onBlockAnalyzed(const BasicBlock *BB) override { if (BB->getTerminator()->getNumSuccessors() > 1) set(InlineCostFeatureIndex::IsMultipleBlocks, 1); Threshold -= SingleBBBonus; } InlineResult finalizeAnalysis() override { auto *Caller = CandidateCall.getFunction(); if (Caller->hasMinSize()) { DominatorTree DT(F); LoopInfo LI(DT); for (Loop *L : LI) { // Ignore loops that will not be executed if (DeadBlocks.count(L->getHeader())) continue; increment(InlineCostFeatureIndex::NumLoops, InlineConstants::LoopPenalty); } } set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size()); set(InlineCostFeatureIndex::SimplifiedInstructions, NumInstructionsSimplified); set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs); set(InlineCostFeatureIndex::ConstantOffsetPtrArgs, NumConstantOffsetPtrArgs); set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities); if (NumVectorInstructions <= NumInstructions / 10) Threshold -= VectorBonus; else if (NumVectorInstructions <= NumInstructions / 2) Threshold -= VectorBonus / 2; set(InlineCostFeatureIndex::Threshold, Threshold); return InlineResult::success(); } bool shouldStop() override { return false; } void onLoadEliminationOpportunity() override { increment(InlineCostFeatureIndex::LoadElimination, 1); } InlineResult onAnalysisStart() override { increment(InlineCostFeatureIndex::CallSiteCost, -1 * getCallsiteCost(this->CandidateCall, DL)); set(InlineCostFeatureIndex::ColdCcPenalty, (F.getCallingConv() == CallingConv::Cold)); // FIXME: we shouldn't repeat this logic in both the Features and Cost // analyzer - instead, we should abstract it to a common method in the // CallAnalyzer int SingleBBBonusPercent = 50; int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); Threshold += TTI.adjustInliningThreshold(&CandidateCall); Threshold *= TTI.getInliningThresholdMultiplier(); SingleBBBonus = Threshold * SingleBBBonusPercent / 100; VectorBonus = Threshold * VectorBonusPercent / 100; Threshold += (SingleBBBonus + VectorBonus); return InlineResult::success(); } public: InlineCostFeaturesAnalyzer( const TargetTransformInfo &TTI, function_ref &GetAssumptionCache, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE, Function &Callee, CallBase &Call) : CallAnalyzer(Callee, Call, TTI, GetAssumptionCache, GetBFI, PSI) {} const InlineCostFeatures &features() const { return Cost; } }; } // namespace /// Test whether the given value is an Alloca-derived function argument. bool CallAnalyzer::isAllocaDerivedArg(Value *V) { return SROAArgValues.count(V); } void CallAnalyzer::disableSROAForArg(AllocaInst *SROAArg) { onDisableSROA(SROAArg); EnabledSROAAllocas.erase(SROAArg); disableLoadElimination(); } void InlineCostAnnotationWriter::emitInstructionAnnot( const Instruction *I, formatted_raw_ostream &OS) { // The cost of inlining of the given instruction is printed always. // The threshold delta is printed only when it is non-zero. It happens // when we decided to give a bonus at a particular instruction. Optional Record = ICCA->getCostDetails(I); if (!Record) OS << "; No analysis for the instruction"; else { OS << "; cost before = " << Record->CostBefore << ", cost after = " << Record->CostAfter << ", threshold before = " << Record->ThresholdBefore << ", threshold after = " << Record->ThresholdAfter << ", "; OS << "cost delta = " << Record->getCostDelta(); if (Record->hasThresholdChanged()) OS << ", threshold delta = " << Record->getThresholdDelta(); } auto C = ICCA->getSimplifiedValue(const_cast(I)); if (C) { OS << ", simplified to "; C.getValue()->print(OS, true); } OS << "\n"; } /// If 'V' maps to a SROA candidate, disable SROA for it. void CallAnalyzer::disableSROA(Value *V) { if (auto *SROAArg = getSROAArgForValueOrNull(V)) { disableSROAForArg(SROAArg); } } void CallAnalyzer::disableLoadElimination() { if (EnableLoadElimination) { onDisableLoadElimination(); EnableLoadElimination = false; } } /// Accumulate a constant GEP offset into an APInt if possible. /// /// Returns false if unable to compute the offset for any reason. Respects any /// simplified values known during the analysis of this callsite. bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { unsigned IntPtrWidth = DL.getIndexTypeSizeInBits(GEP.getType()); assert(IntPtrWidth == Offset.getBitWidth()); for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); GTI != GTE; ++GTI) { ConstantInt *OpC = dyn_cast(GTI.getOperand()); if (!OpC) if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) OpC = dyn_cast(SimpleOp); if (!OpC) return false; if (OpC->isZero()) continue; // Handle a struct index, which adds its field offset to the pointer. if (StructType *STy = GTI.getStructTypeOrNull()) { unsigned ElementIdx = OpC->getZExtValue(); const StructLayout *SL = DL.getStructLayout(STy); Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); continue; } APInt TypeSize(IntPtrWidth, DL.getTypeAllocSize(GTI.getIndexedType())); Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } return true; } /// Use TTI to check whether a GEP is free. /// /// Respects any simplified values known during the analysis of this callsite. bool CallAnalyzer::isGEPFree(GetElementPtrInst &GEP) { SmallVector Operands; Operands.push_back(GEP.getOperand(0)); for (const Use &Op : GEP.indices()) if (Constant *SimpleOp = SimplifiedValues.lookup(Op)) Operands.push_back(SimpleOp); else Operands.push_back(Op); return TTI.getUserCost(&GEP, Operands, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::visitAlloca(AllocaInst &I) { disableSROA(I.getOperand(0)); // Check whether inlining will turn a dynamic alloca into a static // alloca and handle that case. if (I.isArrayAllocation()) { Constant *Size = SimplifiedValues.lookup(I.getArraySize()); if (auto *AllocSize = dyn_cast_or_null(Size)) { // Sometimes a dynamic alloca could be converted into a static alloca // after this constant prop, and become a huge static alloca on an // unconditional CFG path. Avoid inlining if this is going to happen above // a threshold. // FIXME: If the threshold is removed or lowered too much, we could end up // being too pessimistic and prevent inlining non-problematic code. This // could result in unintended perf regressions. A better overall strategy // is needed to track stack usage during inlining. Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingMultiplyAdd( AllocSize->getLimitedValue(), DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); if (AllocatedSize > InlineConstants::MaxSimplifiedDynamicAllocaToInline) HasDynamicAlloca = true; return false; } } // Accumulate the allocated size. if (I.isStaticAlloca()) { Type *Ty = I.getAllocatedType(); AllocatedSize = SaturatingAdd(DL.getTypeAllocSize(Ty).getKnownMinSize(), AllocatedSize); } // FIXME: This is overly conservative. Dynamic allocas are inefficient for // a variety of reasons, and so we would like to not inline them into // functions which don't currently have a dynamic alloca. This simply // disables inlining altogether in the presence of a dynamic alloca. if (!I.isStaticAlloca()) HasDynamicAlloca = true; return false; } bool CallAnalyzer::visitPHI(PHINode &I) { // FIXME: We need to propagate SROA *disabling* through phi nodes, even // though we don't want to propagate it's bonuses. The idea is to disable // SROA if it *might* be used in an inappropriate manner. // Phi nodes are always zero-cost. // FIXME: Pointer sizes may differ between different address spaces, so do we // need to use correct address space in the call to getPointerSizeInBits here? // Or could we skip the getPointerSizeInBits call completely? As far as I can // see the ZeroOffset is used as a dummy value, so we can probably use any // bit width for the ZeroOffset? APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0)); bool CheckSROA = I.getType()->isPointerTy(); // Track the constant or pointer with constant offset we've seen so far. Constant *FirstC = nullptr; std::pair FirstBaseAndOffset = {nullptr, ZeroOffset}; Value *FirstV = nullptr; for (unsigned i = 0, e = I.getNumIncomingValues(); i != e; ++i) { BasicBlock *Pred = I.getIncomingBlock(i); // If the incoming block is dead, skip the incoming block. if (DeadBlocks.count(Pred)) continue; // If the parent block of phi is not the known successor of the incoming // block, skip the incoming block. BasicBlock *KnownSuccessor = KnownSuccessors[Pred]; if (KnownSuccessor && KnownSuccessor != I.getParent()) continue; Value *V = I.getIncomingValue(i); // If the incoming value is this phi itself, skip the incoming value. if (&I == V) continue; Constant *C = dyn_cast(V); if (!C) C = SimplifiedValues.lookup(V); std::pair BaseAndOffset = {nullptr, ZeroOffset}; if (!C && CheckSROA) BaseAndOffset = ConstantOffsetPtrs.lookup(V); if (!C && !BaseAndOffset.first) // The incoming value is neither a constant nor a pointer with constant // offset, exit early. return true; if (FirstC) { if (FirstC == C) // If we've seen a constant incoming value before and it is the same // constant we see this time, continue checking the next incoming value. continue; // Otherwise early exit because we either see a different constant or saw // a constant before but we have a pointer with constant offset this time. return true; } if (FirstV) { // The same logic as above, but check pointer with constant offset here. if (FirstBaseAndOffset == BaseAndOffset) continue; return true; } if (C) { // This is the 1st time we've seen a constant, record it. FirstC = C; continue; } // The remaining case is that this is the 1st time we've seen a pointer with // constant offset, record it. FirstV = V; FirstBaseAndOffset = BaseAndOffset; } // Check if we can map phi to a constant. if (FirstC) { SimplifiedValues[&I] = FirstC; return true; } // Check if we can map phi to a pointer with constant offset. if (FirstBaseAndOffset.first) { ConstantOffsetPtrs[&I] = FirstBaseAndOffset; if (auto *SROAArg = getSROAArgForValueOrNull(FirstV)) SROAArgValues[&I] = SROAArg; } return true; } /// Check we can fold GEPs of constant-offset call site argument pointers. /// This requires target data and inbounds GEPs. /// /// \return true if the specified GEP can be folded. bool CallAnalyzer::canFoldInboundsGEP(GetElementPtrInst &I) { // Check if we have a base + offset for the pointer. std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getPointerOperand()); if (!BaseAndOffset.first) return false; // Check if the offset of this GEP is constant, and if so accumulate it // into Offset. if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) return false; // Add the result as a new mapping to Base + Offset. ConstantOffsetPtrs[&I] = BaseAndOffset; return true; } bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { auto *SROAArg = getSROAArgForValueOrNull(I.getPointerOperand()); // Lambda to check whether a GEP's indices are all constant. auto IsGEPOffsetConstant = [&](GetElementPtrInst &GEP) { for (const Use &Op : GEP.indices()) if (!isa(Op) && !SimplifiedValues.lookup(Op)) return false; return true; }; if (!DisableGEPConstOperand) if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { SmallVector Indices; for (unsigned int Index = 1; Index < COps.size(); ++Index) Indices.push_back(COps[Index]); return ConstantExpr::getGetElementPtr( I.getSourceElementType(), COps[0], Indices, I.isInBounds()); })) return true; if ((I.isInBounds() && canFoldInboundsGEP(I)) || IsGEPOffsetConstant(I)) { if (SROAArg) SROAArgValues[&I] = SROAArg; // Constant GEPs are modeled as free. return true; } // Variable GEPs will require math and will disable SROA. if (SROAArg) disableSROAForArg(SROAArg); return isGEPFree(I); } /// Simplify \p I if its operands are constants and update SimplifiedValues. /// \p Evaluate is a callable specific to instruction type that evaluates the /// instruction when all the operands are constants. template bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { SmallVector COps; for (Value *Op : I.operands()) { Constant *COp = dyn_cast(Op); if (!COp) COp = SimplifiedValues.lookup(Op); if (!COp) return false; COps.push_back(COp); } auto *C = Evaluate(COps); if (!C) return false; SimplifiedValues[&I] = C; return true; } /// Try to simplify a call to llvm.is.constant. /// /// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since /// we expect calls of this specific intrinsic to be infrequent. /// /// FIXME: Given that we know CB's parent (F) caller /// (CandidateCall->getParent()->getParent()), we might be able to determine /// whether inlining F into F's caller would change how the call to /// llvm.is.constant would evaluate. bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) { Value *Arg = CB.getArgOperand(0); auto *C = dyn_cast(Arg); if (!C) C = dyn_cast_or_null(SimplifiedValues.lookup(Arg)); Type *RT = CB.getFunctionType()->getReturnType(); SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0); return true; } bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getBitCast(COps[0], I.getType()); })) return true; // Track base/offsets through casts std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); // Casts don't change the offset, just wrap it up. if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; // Also look for SROA candidates here. if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; // Bitcasts are always zero cost. return true; } bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getPtrToInt(COps[0], I.getType()); })) return true; // Track base/offset pairs when converted to a plain integer provided the // integer is large enough to represent the pointer. unsigned IntegerSize = I.getType()->getScalarSizeInBits(); unsigned AS = I.getOperand(0)->getType()->getPointerAddressSpace(); if (IntegerSize == DL.getPointerSizeInBits(AS)) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(I.getOperand(0)); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; } // This is really weird. Technically, ptrtoint will disable SROA. However, // unless that ptrtoint is *used* somewhere in the live basic blocks after // inlining, it will be nuked, and SROA should proceed. All of the uses which // would block SROA would also block SROA if applied directly to a pointer, // and so we can just add the integer in here. The only places where SROA is // preserved either cannot fire on an integer, or won't in-and-of themselves // disable SROA (ext) w/o some later use that we would see and disable. if (auto *SROAArg = getSROAArgForValueOrNull(I.getOperand(0))) SROAArgValues[&I] = SROAArg; return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getIntToPtr(COps[0], I.getType()); })) return true; // Track base/offset pairs when round-tripped through a pointer without // modifications provided the integer is not too large. Value *Op = I.getOperand(0); unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); if (IntegerSize <= DL.getPointerTypeSizeInBits(I.getType())) { std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); if (BaseAndOffset.first) ConstantOffsetPtrs[&I] = BaseAndOffset; } // "Propagate" SROA here in the same manner as we do for ptrtoint above. if (auto *SROAArg = getSROAArgForValueOrNull(Op)) SROAArgValues[&I] = SROAArg; return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through casts. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); })) return true; // Disable SROA in the face of arbitrary casts we don't explicitly list // elsewhere. disableSROA(I.getOperand(0)); // If this is a floating-point cast, and the target says this operation // is expensive, this may eventually become a library call. Treat the cost // as such. switch (I.getOpcode()) { case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive) onCallPenalty(); break; default: break; } return TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free; } bool CallAnalyzer::paramHasAttr(Argument *A, Attribute::AttrKind Attr) { return CandidateCall.paramHasAttr(A->getArgNo(), Attr); } bool CallAnalyzer::isKnownNonNullInCallee(Value *V) { // Does the *call site* have the NonNull attribute set on an argument? We // use the attribute on the call site to memoize any analysis done in the // caller. This will also trip if the callee function has a non-null // parameter attribute, but that's a less interesting case because hopefully // the callee would already have been simplified based on that. if (Argument *A = dyn_cast(V)) if (paramHasAttr(A, Attribute::NonNull)) return true; // Is this an alloca in the caller? This is distinct from the attribute case // above because attributes aren't updated within the inliner itself and we // always want to catch the alloca derived case. if (isAllocaDerivedArg(V)) // We can actually predict the result of comparisons between an // alloca-derived value and null. Note that this fires regardless of // SROA firing. return true; return false; } bool CallAnalyzer::allowSizeGrowth(CallBase &Call) { // If the normal destination of the invoke or the parent block of the call // site is unreachable-terminated, there is little point in inlining this // unless there is literally zero cost. // FIXME: Note that it is possible that an unreachable-terminated block has a // hot entry. For example, in below scenario inlining hot_call_X() may be // beneficial : // main() { // hot_call_1(); // ... // hot_call_N() // exit(0); // } // For now, we are not handling this corner case here as it is rare in real // code. In future, we should elaborate this based on BPI and BFI in more // general threshold adjusting heuristics in updateThreshold(). if (InvokeInst *II = dyn_cast(&Call)) { if (isa(II->getNormalDest()->getTerminator())) return false; } else if (isa(Call.getParent()->getTerminator())) return false; return true; } bool InlineCostCallAnalyzer::isColdCallSite(CallBase &Call, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's coldness is // determined based on that. if (PSI && PSI->hasProfileSummary()) return PSI->isColdCallSite(Call, CallerBFI); // Otherwise we need BFI to be available. if (!CallerBFI) return false; // Determine if the callsite is cold relative to caller's entry. We could // potentially cache the computation of scaled entry frequency, but the added // complexity is not worth it unless this scaling shows up high in the // profiles. const BranchProbability ColdProb(ColdCallSiteRelFreq, 100); auto CallSiteBB = Call.getParent(); auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB); auto CallerEntryFreq = CallerBFI->getBlockFreq(&(Call.getCaller()->getEntryBlock())); return CallSiteFreq < CallerEntryFreq * ColdProb; } Optional InlineCostCallAnalyzer::getHotCallSiteThreshold(CallBase &Call, BlockFrequencyInfo *CallerBFI) { // If global profile summary is available, then callsite's hotness is // determined based on that. if (PSI && PSI->hasProfileSummary() && PSI->isHotCallSite(Call, CallerBFI)) return Params.HotCallSiteThreshold; // Otherwise we need BFI to be available and to have a locally hot callsite // threshold. if (!CallerBFI || !Params.LocallyHotCallSiteThreshold) return None; // Determine if the callsite is hot relative to caller's entry. We could // potentially cache the computation of scaled entry frequency, but the added // complexity is not worth it unless this scaling shows up high in the // profiles. auto CallSiteBB = Call.getParent(); auto CallSiteFreq = CallerBFI->getBlockFreq(CallSiteBB).getFrequency(); auto CallerEntryFreq = CallerBFI->getEntryFreq(); if (CallSiteFreq >= CallerEntryFreq * HotCallSiteRelFreq) return Params.LocallyHotCallSiteThreshold; // Otherwise treat it normally. return None; } void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) { // If no size growth is allowed for this inlining, set Threshold to 0. if (!allowSizeGrowth(Call)) { Threshold = 0; return; } Function *Caller = Call.getCaller(); // return min(A, B) if B is valid. auto MinIfValid = [](int A, Optional B) { return B ? std::min(A, B.getValue()) : A; }; // return max(A, B) if B is valid. auto MaxIfValid = [](int A, Optional B) { return B ? std::max(A, B.getValue()) : A; }; // Various bonus percentages. These are multiplied by Threshold to get the // bonus values. // SingleBBBonus: This bonus is applied if the callee has a single reachable // basic block at the given callsite context. This is speculatively applied // and withdrawn if more than one basic block is seen. // // LstCallToStaticBonus: This large bonus is applied to ensure the inlining // of the last call to a static function as inlining such functions is // guaranteed to reduce code size. // // These bonus percentages may be set to 0 based on properties of the caller // and the callsite. int SingleBBBonusPercent = 50; int VectorBonusPercent = TTI.getInlinerVectorBonusPercent(); int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus; // Lambda to set all the above bonus and bonus percentages to 0. auto DisallowAllBonuses = [&]() { SingleBBBonusPercent = 0; VectorBonusPercent = 0; LastCallToStaticBonus = 0; }; // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available // and reduce the threshold if the caller has the necessary attribute. if (Caller->hasMinSize()) { Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold); // For minsize, we want to disable the single BB bonus and the vector // bonuses, but not the last-call-to-static bonus. Inlining the last call to // a static function will, at the minimum, eliminate the parameter setup and // call/return instructions. SingleBBBonusPercent = 0; VectorBonusPercent = 0; } else if (Caller->hasOptSize()) Threshold = MinIfValid(Threshold, Params.OptSizeThreshold); // Adjust the threshold based on inlinehint attribute and profile based // hotness information if the caller does not have MinSize attribute. if (!Caller->hasMinSize()) { if (Callee.hasFnAttribute(Attribute::InlineHint)) Threshold = MaxIfValid(Threshold, Params.HintThreshold); // FIXME: After switching to the new passmanager, simplify the logic below // by checking only the callsite hotness/coldness as we will reliably // have local profile information. // // Callsite hotness and coldness can be determined if sample profile is // used (which adds hotness metadata to calls) or if caller's // BlockFrequencyInfo is available. BlockFrequencyInfo *CallerBFI = GetBFI ? &(GetBFI(*Caller)) : nullptr; auto HotCallSiteThreshold = getHotCallSiteThreshold(Call, CallerBFI); if (!Caller->hasOptSize() && HotCallSiteThreshold) { LLVM_DEBUG(dbgs() << "Hot callsite.\n"); // FIXME: This should update the threshold only if it exceeds the // current threshold, but AutoFDO + ThinLTO currently relies on this // behavior to prevent inlining of hot callsites during ThinLTO // compile phase. Threshold = HotCallSiteThreshold.getValue(); } else if (isColdCallSite(Call, CallerBFI)) { LLVM_DEBUG(dbgs() << "Cold callsite.\n"); // Do not apply bonuses for a cold callsite including the // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase // preventing it from being inlined. DisallowAllBonuses(); Threshold = MinIfValid(Threshold, Params.ColdCallSiteThreshold); } else if (PSI) { // Use callee's global profile information only if we have no way of // determining this via callsite information. if (PSI->isFunctionEntryHot(&Callee)) { LLVM_DEBUG(dbgs() << "Hot callee.\n"); // If callsite hotness can not be determined, we may still know // that the callee is hot and treat it as a weaker hint for threshold // increase. Threshold = MaxIfValid(Threshold, Params.HintThreshold); } else if (PSI->isFunctionEntryCold(&Callee)) { LLVM_DEBUG(dbgs() << "Cold callee.\n"); // Do not apply bonuses for a cold callee including the // LastCallToStatic bonus. While this bonus might result in code size // reduction, it can cause the size of a non-cold caller to increase // preventing it from being inlined. DisallowAllBonuses(); Threshold = MinIfValid(Threshold, Params.ColdThreshold); } } } Threshold += TTI.adjustInliningThreshold(&Call); // Finally, take the target-specific inlining threshold multiplier into // account. Threshold *= TTI.getInliningThresholdMultiplier(); SingleBBBonus = Threshold * SingleBBBonusPercent / 100; VectorBonus = Threshold * VectorBonusPercent / 100; bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() && &F == Call.getCalledFunction(); // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. It may seem odd to update // Cost in updateThreshold, but the bonus depends on the logic in this method. if (OnlyOneCallAndLocalLinkage) Cost -= LastCallToStaticBonus; } bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); })) return true; if (I.getOpcode() == Instruction::FCmp) return false; // Otherwise look for a comparison between constant offset pointers with // a common base. Value *LHSBase, *RHSBase; APInt LHSOffset, RHSOffset; std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); if (LHSBase) { std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); if (RHSBase && LHSBase == RHSBase) { // We have common bases, fold the icmp to a constant based on the // offsets. Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; ++NumConstantPtrCmps; return true; } } } // If the comparison is an equality comparison with null, we can simplify it // if we know the value (argument) can't be null if (I.isEquality() && isa(I.getOperand(1)) && isKnownNonNullInCallee(I.getOperand(0))) { bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) : ConstantInt::getFalse(I.getType()); return true; } return handleSROA(I.getOperand(0), isa(I.getOperand(1))); } bool CallAnalyzer::visitSub(BinaryOperator &I) { // Try to handle a special case: we can fold computing the difference of two // constant-related pointers. Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Value *LHSBase, *RHSBase; APInt LHSOffset, RHSOffset; std::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); if (LHSBase) { std::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); if (RHSBase && LHSBase == RHSBase) { // We have common bases, fold the subtract to a constant based on the // offsets. Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { SimplifiedValues[&I] = C; ++NumConstantPtrDiffs; return true; } } } // Otherwise, fall back to the generic logic for simplifying and handling // instructions. return Base::visitSub(I); } bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); Constant *CLHS = dyn_cast(LHS); if (!CLHS) CLHS = SimplifiedValues.lookup(LHS); Constant *CRHS = dyn_cast(RHS); if (!CRHS) CRHS = SimplifiedValues.lookup(RHS); Value *SimpleV = nullptr; if (auto FI = dyn_cast(&I)) SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, FI->getFastMathFlags(), DL); else SimpleV = SimplifyBinOp(I.getOpcode(), CLHS ? CLHS : LHS, CRHS ? CRHS : RHS, DL); if (Constant *C = dyn_cast_or_null(SimpleV)) SimplifiedValues[&I] = C; if (SimpleV) return true; // Disable any SROA on arguments to arbitrary, unsimplified binary operators. disableSROA(LHS); disableSROA(RHS); // If the instruction is floating point, and the target says this operation // is expensive, this may eventually become a library call. Treat the cost // as such. Unless it's fneg which can be implemented with an xor. using namespace llvm::PatternMatch; if (I.getType()->isFloatingPointTy() && TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive && !match(&I, m_FNeg(m_Value()))) onCallPenalty(); return false; } bool CallAnalyzer::visitFNeg(UnaryOperator &I) { Value *Op = I.getOperand(0); Constant *COp = dyn_cast(Op); if (!COp) COp = SimplifiedValues.lookup(Op); Value *SimpleV = SimplifyFNegInst( COp ? COp : Op, cast(I).getFastMathFlags(), DL); if (Constant *C = dyn_cast_or_null(SimpleV)) SimplifiedValues[&I] = C; if (SimpleV) return true; // Disable any SROA on arguments to arbitrary, unsimplified fneg. disableSROA(Op); return false; } bool CallAnalyzer::visitLoad(LoadInst &I) { if (handleSROA(I.getPointerOperand(), I.isSimple())) return true; // If the data is already loaded from this address and hasn't been clobbered // by any stores or calls, this load is likely to be redundant and can be // eliminated. if (EnableLoadElimination && !LoadAddrSet.insert(I.getPointerOperand()).second && I.isUnordered()) { onLoadEliminationOpportunity(); return true; } return false; } bool CallAnalyzer::visitStore(StoreInst &I) { if (handleSROA(I.getPointerOperand(), I.isSimple())) return true; // The store can potentially clobber loads and prevent repeated loads from // being eliminated. // FIXME: // 1. We can probably keep an initial set of eliminatable loads substracted // from the cost even when we finally see a store. We just need to disable // *further* accumulation of elimination savings. // 2. We should probably at some point thread MemorySSA for the callee into // this and then use that to actually compute *really* precise savings. disableLoadElimination(); return false; } bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { // Constant folding for extract value is trivial. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getExtractValue(COps[0], I.getIndices()); })) return true; // SROA can't look through these, but they may be free. return Base::visitExtractValue(I); } bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { // Constant folding for insert value is trivial. if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], /*InsertedValueOperand*/ COps[1], I.getIndices()); })) return true; // SROA can't look through these, but they may be free. return Base::visitInsertValue(I); } /// Try to simplify a call site. /// /// Takes a concrete function and callsite and tries to actually simplify it by /// analyzing the arguments and call itself with instsimplify. Returns true if /// it has simplified the callsite to some other entity (a constant), making it /// free. bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) { // FIXME: Using the instsimplify logic directly for this is inefficient // because we have to continually rebuild the argument list even when no // simplifications can be performed. Until that is fixed with remapping // inside of instsimplify, directly constant fold calls here. if (!canConstantFoldCallTo(&Call, F)) return false; // Try to re-map the arguments to constants. SmallVector ConstantArgs; ConstantArgs.reserve(Call.arg_size()); for (Value *I : Call.args()) { Constant *C = dyn_cast(I); if (!C) C = dyn_cast_or_null(SimplifiedValues.lookup(I)); if (!C) return false; // This argument doesn't map to a constant. ConstantArgs.push_back(C); } if (Constant *C = ConstantFoldCall(&Call, F, ConstantArgs)) { SimplifiedValues[&Call] = C; return true; } return false; } bool CallAnalyzer::visitCallBase(CallBase &Call) { if (!onCallBaseVisitStart(Call)) return true; if (Call.hasFnAttr(Attribute::ReturnsTwice) && !F.hasFnAttribute(Attribute::ReturnsTwice)) { // This aborts the entire analysis. ExposesReturnsTwice = true; return false; } if (isa(Call) && cast(Call).cannotDuplicate()) ContainsNoDuplicateCall = true; Value *Callee = Call.getCalledOperand(); Function *F = dyn_cast_or_null(Callee); bool IsIndirectCall = !F; if (IsIndirectCall) { // Check if this happens to be an indirect function call to a known function // in this inline context. If not, we've done all we can. F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); if (!F) { onCallArgumentSetup(Call); if (!Call.onlyReadsMemory()) disableLoadElimination(); return Base::visitCallBase(Call); } } assert(F && "Expected a call to a known function"); // When we have a concrete function, first try to simplify it directly. if (simplifyCallSite(F, Call)) return true; // Next check if it is an intrinsic we know about. // FIXME: Lift this into part of the InstVisitor. if (IntrinsicInst *II = dyn_cast(&Call)) { switch (II->getIntrinsicID()) { default: if (!Call.onlyReadsMemory() && !isAssumeLikeIntrinsic(II)) disableLoadElimination(); return Base::visitCallBase(Call); case Intrinsic::load_relative: onLoadRelativeIntrinsic(); return false; case Intrinsic::memset: case Intrinsic::memcpy: case Intrinsic::memmove: disableLoadElimination(); // SROA can usually chew through these intrinsics, but they aren't free. return false; case Intrinsic::icall_branch_funnel: case Intrinsic::localescape: HasUninlineableIntrinsic = true; return false; case Intrinsic::vastart: InitsVargArgs = true; return false; case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0))) SROAArgValues[II] = SROAArg; return true; case Intrinsic::is_constant: return simplifyIntrinsicCallIsConstant(Call); } } if (F == Call.getFunction()) { // This flag will fully abort the analysis, so don't bother with anything // else. IsRecursiveCall = true; if (!AllowRecursiveCall) return false; } if (TTI.isLoweredToCall(F)) { onLoweredCall(F, Call, IsIndirectCall); } if (!(Call.onlyReadsMemory() || (IsIndirectCall && F->onlyReadsMemory()))) disableLoadElimination(); return Base::visitCallBase(Call); } bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { // At least one return instruction will be free after inlining. bool Free = !HasReturn; HasReturn = true; return Free; } bool CallAnalyzer::visitBranchInst(BranchInst &BI) { // We model unconditional branches as essentially free -- they really // shouldn't exist at all, but handling them makes the behavior of the // inliner more regular and predictable. Interestingly, conditional branches // which will fold away are also free. return BI.isUnconditional() || isa(BI.getCondition()) || isa_and_nonnull( SimplifiedValues.lookup(BI.getCondition())); } bool CallAnalyzer::visitSelectInst(SelectInst &SI) { bool CheckSROA = SI.getType()->isPointerTy(); Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); Constant *TrueC = dyn_cast(TrueVal); if (!TrueC) TrueC = SimplifiedValues.lookup(TrueVal); Constant *FalseC = dyn_cast(FalseVal); if (!FalseC) FalseC = SimplifiedValues.lookup(FalseVal); Constant *CondC = dyn_cast_or_null(SimplifiedValues.lookup(SI.getCondition())); if (!CondC) { // Select C, X, X => X if (TrueC == FalseC && TrueC) { SimplifiedValues[&SI] = TrueC; return true; } if (!CheckSROA) return Base::visitSelectInst(SI); std::pair TrueBaseAndOffset = ConstantOffsetPtrs.lookup(TrueVal); std::pair FalseBaseAndOffset = ConstantOffsetPtrs.lookup(FalseVal); if (TrueBaseAndOffset == FalseBaseAndOffset && TrueBaseAndOffset.first) { ConstantOffsetPtrs[&SI] = TrueBaseAndOffset; if (auto *SROAArg = getSROAArgForValueOrNull(TrueVal)) SROAArgValues[&SI] = SROAArg; return true; } return Base::visitSelectInst(SI); } // Select condition is a constant. Value *SelectedV = CondC->isAllOnesValue() ? TrueVal : (CondC->isNullValue()) ? FalseVal : nullptr; if (!SelectedV) { // Condition is a vector constant that is not all 1s or all 0s. If all // operands are constants, ConstantExpr::getSelect() can handle the cases // such as select vectors. if (TrueC && FalseC) { if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) { SimplifiedValues[&SI] = C; return true; } } return Base::visitSelectInst(SI); } // Condition is either all 1s or all 0s. SI can be simplified. if (Constant *SelectedC = dyn_cast(SelectedV)) { SimplifiedValues[&SI] = SelectedC; return true; } if (!CheckSROA) return true; std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(SelectedV); if (BaseAndOffset.first) { ConstantOffsetPtrs[&SI] = BaseAndOffset; if (auto *SROAArg = getSROAArgForValueOrNull(SelectedV)) SROAArgValues[&SI] = SROAArg; } return true; } bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. if (isa(SI.getCondition())) return true; if (Value *V = SimplifiedValues.lookup(SI.getCondition())) if (isa(V)) return true; // Assume the most general case where the switch is lowered into // either a jump table, bit test, or a balanced binary tree consisting of // case clusters without merging adjacent clusters with the same // destination. We do not consider the switches that are lowered with a mix // of jump table/bit test/binary search tree. The cost of the switch is // proportional to the size of the tree or the size of jump table range. // // NB: We convert large switches which are just used to initialize large phi // nodes to lookup tables instead in simplifycfg, so this shouldn't prevent // inlining those. It will prevent inlining in cases where the optimization // does not (yet) fire. unsigned JumpTableSize = 0; BlockFrequencyInfo *BFI = GetBFI ? &(GetBFI(F)) : nullptr; unsigned NumCaseCluster = TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize, PSI, BFI); onFinalizeSwitch(JumpTableSize, NumCaseCluster); return false; } bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { // We never want to inline functions that contain an indirectbr. This is // incorrect because all the blockaddress's (in static global initializers // for example) would be referring to the original function, and this // indirect jump would jump from the inlined copy of the function into the // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions with // indirectbr's as long as no other function or global references the // blockaddress of a block within the current function. HasIndirectBr = true; return false; } bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { // FIXME: It's not clear that a single instruction is an accurate model for // the inline cost of a resume instruction. return false; } bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { // FIXME: It's not clear that a single instruction is an accurate model for // the inline cost of a cleanupret instruction. return false; } bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { // FIXME: It's not clear that a single instruction is an accurate model for // the inline cost of a catchret instruction. return false; } bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { // FIXME: It might be reasonably to discount the cost of instructions leading // to unreachable as they have the lowest possible impact on both runtime and // code size. return true; // No actual code is needed for unreachable. } bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. if (TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free) return true; // We found something we don't understand or can't handle. Mark any SROA-able // values in the operand list as no longer viable. for (const Use &Op : I.operands()) disableSROA(Op); return false; } /// Analyze a basic block for its contribution to the inline cost. /// /// This method walks the analyzer over every instruction in the given basic /// block and accounts for their cost during inlining at this callsite. It /// aborts early if the threshold has been exceeded or an impossible to inline /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. InlineResult CallAnalyzer::analyzeBlock(BasicBlock *BB, SmallPtrSetImpl &EphValues) { for (Instruction &I : *BB) { // FIXME: Currently, the number of instructions in a function regardless of // our ability to simplify them during inline to constants or dead code, // are actually used by the vector bonus heuristic. As long as that's true, // we have to special case debug intrinsics here to prevent differences in // inlining due to debug symbols. Eventually, the number of unsimplified // instructions shouldn't factor into the cost computation, but until then, // hack around it here. // Similarly, skip pseudo-probes. if (I.isDebugOrPseudoInst()) continue; // Skip ephemeral values. if (EphValues.count(&I)) continue; ++NumInstructions; if (isa(I) || I.getType()->isVectorTy()) ++NumVectorInstructions; // If the instruction simplified to a constant, there is no cost to this // instruction. Visit the instructions using our InstVisitor to account for // all of the per-instruction logic. The visit tree returns true if we // consumed the instruction in any way, and false if the instruction's base // cost should count against inlining. onInstructionAnalysisStart(&I); if (Base::visit(&I)) ++NumInstructionsSimplified; else onMissedSimplification(); onInstructionAnalysisFinish(&I); using namespace ore; // If the visit this instruction detected an uninlinable pattern, abort. InlineResult IR = InlineResult::success(); if (IsRecursiveCall && !AllowRecursiveCall) IR = InlineResult::failure("recursive"); else if (ExposesReturnsTwice) IR = InlineResult::failure("exposes returns twice"); else if (HasDynamicAlloca) IR = InlineResult::failure("dynamic alloca"); else if (HasIndirectBr) IR = InlineResult::failure("indirect branch"); else if (HasUninlineableIntrinsic) IR = InlineResult::failure("uninlinable intrinsic"); else if (InitsVargArgs) IR = InlineResult::failure("varargs"); if (!IR.isSuccess()) { if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CandidateCall) << NV("Callee", &F) << " has uninlinable pattern (" << NV("InlineResult", IR.getFailureReason()) << ") and cost is not fully computed"; }); return IR; } // If the caller is a recursive function then we don't want to inline // functions which allocate a lot of stack space because it would increase // the caller stack usage dramatically. if (IsCallerRecursive && AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) { auto IR = InlineResult::failure("recursive and allocates too much stack space"); if (ORE) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", &CandidateCall) << NV("Callee", &F) << " is " << NV("InlineResult", IR.getFailureReason()) << ". Cost is not fully computed"; }); return IR; } if (shouldStop()) return InlineResult::failure( "Call site analysis is not favorable to inlining."); } return InlineResult::success(); } /// Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { if (!V->getType()->isPointerTy()) return nullptr; unsigned AS = V->getType()->getPointerAddressSpace(); unsigned IntPtrWidth = DL.getIndexSizeInBits(AS); APInt Offset = APInt::getZero(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. SmallPtrSet Visited; Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) return nullptr; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { V = cast(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(V)) { if (GA->isInterposable()) break; V = GA->getAliasee(); } else { break; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); } while (Visited.insert(V).second); Type *IdxPtrTy = DL.getIndexType(V->getType()); return cast(ConstantInt::get(IdxPtrTy, Offset)); } /// Find dead blocks due to deleted CFG edges during inlining. /// /// If we know the successor of the current block, \p CurrBB, has to be \p /// NextBB, the other successors of \p CurrBB are dead if these successors have /// no live incoming CFG edges. If one block is found to be dead, we can /// continue growing the dead block list by checking the successors of the dead /// blocks to see if all their incoming edges are dead or not. void CallAnalyzer::findDeadBlocks(BasicBlock *CurrBB, BasicBlock *NextBB) { auto IsEdgeDead = [&](BasicBlock *Pred, BasicBlock *Succ) { // A CFG edge is dead if the predecessor is dead or the predecessor has a // known successor which is not the one under exam. return (DeadBlocks.count(Pred) || (KnownSuccessors[Pred] && KnownSuccessors[Pred] != Succ)); }; auto IsNewlyDead = [&](BasicBlock *BB) { // If all the edges to a block are dead, the block is also dead. return (!DeadBlocks.count(BB) && llvm::all_of(predecessors(BB), [&](BasicBlock *P) { return IsEdgeDead(P, BB); })); }; for (BasicBlock *Succ : successors(CurrBB)) { if (Succ == NextBB || !IsNewlyDead(Succ)) continue; SmallVector NewDead; NewDead.push_back(Succ); while (!NewDead.empty()) { BasicBlock *Dead = NewDead.pop_back_val(); if (DeadBlocks.insert(Dead)) // Continue growing the dead block lists. for (BasicBlock *S : successors(Dead)) if (IsNewlyDead(S)) NewDead.push_back(S); } } } /// Analyze a call site for potential inlining. /// /// Returns true if inlining this call is viable, and false if it is not /// viable. It computes the cost and adjusts the threshold based on numerous /// factors and heuristics. If this method returns false but the computed cost /// is below the computed threshold, then inlining was forcibly disabled by /// some artifact of the routine. InlineResult CallAnalyzer::analyze() { ++NumCallsAnalyzed; auto Result = onAnalysisStart(); if (!Result.isSuccess()) return Result; if (F.empty()) return InlineResult::success(); Function *Caller = CandidateCall.getFunction(); // Check if the caller function is recursive itself. for (User *U : Caller->users()) { CallBase *Call = dyn_cast(U); if (Call && Call->getFunction() == Caller) { IsCallerRecursive = true; break; } } // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. auto CAI = CandidateCall.arg_begin(); for (Argument &FAI : F.args()) { assert(CAI != CandidateCall.arg_end()); if (Constant *C = dyn_cast(CAI)) SimplifiedValues[&FAI] = C; Value *PtrArg = *CAI; if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { ConstantOffsetPtrs[&FAI] = std::make_pair(PtrArg, C->getValue()); // We can SROA any pointer arguments derived from alloca instructions. if (auto *SROAArg = dyn_cast(PtrArg)) { SROAArgValues[&FAI] = SROAArg; onInitializeSROAArg(SROAArg); EnabledSROAAllocas.insert(SROAArg); } } ++CAI; } NumConstantArgs = SimplifiedValues.size(); NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); NumAllocaArgs = SROAArgValues.size(); // FIXME: If a caller has multiple calls to a callee, we end up recomputing // the ephemeral values multiple times (and they're completely determined by // the callee, so this is purely duplicate work). SmallPtrSet EphValues; CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues); // The worklist of live basic blocks in the callee *after* inlining. We avoid // adding basic blocks of the callee which can be proven to be dead for this // particular call site in order to get more accurate cost estimates. This // requires a somewhat heavyweight iteration pattern: we need to walk the // basic blocks in a breadth-first order as we insert live successors. To // accomplish this, prioritizing for small iterations because we exit after // crossing our threshold, we use a small-size optimized SetVector. typedef SetVector, SmallPtrSet> BBSetVector; BBSetVector BBWorklist; BBWorklist.insert(&F.getEntryBlock()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { if (shouldStop()) break; BasicBlock *BB = BBWorklist[Idx]; if (BB->empty()) continue; onBlockStart(BB); // Disallow inlining a blockaddress with uses other than strictly callbr. // A blockaddress only has defined behavior for an indirect branch in the // same function, and we do not currently support inlining indirect // branches. But, the inliner may not see an indirect branch that ends up // being dead code at a particular call site. If the blockaddress escapes // the function, e.g., via a global variable, inlining may lead to an // invalid cross-function reference. // FIXME: pr/39560: continue relaxing this overt restriction. if (BB->hasAddressTaken()) for (User *U : BlockAddress::get(&*BB)->users()) if (!isa(*U)) return InlineResult::failure("blockaddress used outside of callbr"); // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. InlineResult IR = analyzeBlock(BB, EphValues); if (!IR.isSuccess()) return IR; Instruction *TI = BB->getTerminator(); // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast(TI)) { if (BI->isConditional()) { Value *Cond = BI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { BasicBlock *NextBB = BI->getSuccessor(SimpleCond->isZero() ? 1 : 0); BBWorklist.insert(NextBB); KnownSuccessors[BB] = NextBB; findDeadBlocks(BB, NextBB); continue; } } } else if (SwitchInst *SI = dyn_cast(TI)) { Value *Cond = SI->getCondition(); if (ConstantInt *SimpleCond = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { BasicBlock *NextBB = SI->findCaseValue(SimpleCond)->getCaseSuccessor(); BBWorklist.insert(NextBB); KnownSuccessors[BB] = NextBB; findDeadBlocks(BB, NextBB); continue; } } // If we're unable to select a particular successor, just count all of // them. for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx) BBWorklist.insert(TI->getSuccessor(TIdx)); onBlockAnalyzed(BB); } bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() && &F == CandidateCall.getCalledFunction(); // If this is a noduplicate call, we can still inline as long as // inlining this would cause the removal of the caller (so the instruction // is not actually duplicated, just moved). if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) return InlineResult::failure("noduplicate"); return finalizeAnalysis(); } void InlineCostCallAnalyzer::print(raw_ostream &OS) { #define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n" if (PrintInstructionComments) F.print(OS, &Writer); DEBUG_PRINT_STAT(NumConstantArgs); DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); DEBUG_PRINT_STAT(NumAllocaArgs); DEBUG_PRINT_STAT(NumConstantPtrCmps); DEBUG_PRINT_STAT(NumConstantPtrDiffs); DEBUG_PRINT_STAT(NumInstructionsSimplified); DEBUG_PRINT_STAT(NumInstructions); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); DEBUG_PRINT_STAT(LoadEliminationCost); DEBUG_PRINT_STAT(ContainsNoDuplicateCall); DEBUG_PRINT_STAT(Cost); DEBUG_PRINT_STAT(Threshold); #undef DEBUG_PRINT_STAT } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Dump stats about this call's analysis. LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); } #endif /// Test that there are no attribute conflicts between Caller and Callee /// that prevent inlining. static bool functionsHaveCompatibleAttributes( Function *Caller, Function *Callee, TargetTransformInfo &TTI, function_ref &GetTLI) { // Note that CalleeTLI must be a copy not a reference. The legacy pass manager // caches the most recently created TLI in the TargetLibraryInfoWrapperPass // object, and always returns the same object (which is overwritten on each // GetTLI call). Therefore we copy the first result. auto CalleeTLI = GetTLI(*Callee); return TTI.areInlineCompatible(Caller, Callee) && GetTLI(*Caller).areInlineCompatible(CalleeTLI, InlineCallerSupersetNoBuiltin) && AttributeFuncs::areInlineCompatible(*Caller, *Callee); } int llvm::getCallsiteCost(CallBase &Call, const DataLayout &DL) { int Cost = 0; for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) { if (Call.isByValArgument(I)) { // We approximate the number of loads and stores needed by dividing the // size of the byval type by the target's pointer size. PointerType *PTy = cast(Call.getArgOperand(I)->getType()); unsigned TypeSize = DL.getTypeSizeInBits(Call.getParamByValType(I)); unsigned AS = PTy->getAddressSpace(); unsigned PointerSize = DL.getPointerSizeInBits(AS); // Ceiling division. unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; // If it generates more than 8 stores it is likely to be expanded as an // inline memcpy so we take that as an upper bound. Otherwise we assume // one load and one store per word copied. // FIXME: The maxStoresPerMemcpy setting from the target should be used // here instead of a magic number of 8, but it's not available via // DataLayout. NumStores = std::min(NumStores, 8U); Cost += 2 * NumStores * InlineConstants::InstrCost; } else { // For non-byval arguments subtract off one instruction per call // argument. Cost += InlineConstants::InstrCost; } } // The call instruction also disappears after inlining. Cost += InlineConstants::InstrCost + CallPenalty; return Cost; } InlineCost llvm::getInlineCost( CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { return getInlineCost(Call, Call.getCalledFunction(), Params, CalleeTTI, GetAssumptionCache, GetTLI, GetBFI, PSI, ORE); } Optional llvm::getInliningCostEstimate( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { const InlineParams Params = {/* DefaultThreshold*/ 0, /*HintThreshold*/ {}, /*ColdThreshold*/ {}, /*OptSizeThreshold*/ {}, /*OptMinSizeThreshold*/ {}, /*HotCallSiteThreshold*/ {}, /*LocallyHotCallSiteThreshold*/ {}, /*ColdCallSiteThreshold*/ {}, /*ComputeFullInlineCost*/ true, /*EnableDeferral*/ true}; InlineCostCallAnalyzer CA(*Call.getCalledFunction(), Call, Params, CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, true, /*IgnoreThreshold*/ true); auto R = CA.analyze(); if (!R.isSuccess()) return None; return CA.getCost(); } Optional llvm::getInliningCostFeatures( CallBase &Call, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { InlineCostFeaturesAnalyzer CFA(CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE, *Call.getCalledFunction(), Call); auto R = CFA.analyze(); if (!R.isSuccess()) return None; return CFA.features(); } Optional llvm::getAttributeBasedInliningDecision( CallBase &Call, Function *Callee, TargetTransformInfo &CalleeTTI, function_ref GetTLI) { // Cannot inline indirect calls. if (!Callee) return InlineResult::failure("indirect call"); // When callee coroutine function is inlined into caller coroutine function // before coro-split pass, // coro-early pass can not handle this quiet well. // So we won't inline the coroutine function if it have not been unsplited if (Callee->isPresplitCoroutine()) return InlineResult::failure("unsplited coroutine call"); // Never inline calls with byval arguments that does not have the alloca // address space. Since byval arguments can be replaced with a copy to an // alloca, the inlined code would need to be adjusted to handle that the // argument is in the alloca address space (so it is a little bit complicated // to solve). unsigned AllocaAS = Callee->getParent()->getDataLayout().getAllocaAddrSpace(); for (unsigned I = 0, E = Call.arg_size(); I != E; ++I) if (Call.isByValArgument(I)) { PointerType *PTy = cast(Call.getArgOperand(I)->getType()); if (PTy->getAddressSpace() != AllocaAS) return InlineResult::failure("byval arguments without alloca" " address space"); } // Calls to functions with always-inline attributes should be inlined // whenever possible. if (Call.hasFnAttr(Attribute::AlwaysInline)) { auto IsViable = isInlineViable(*Callee); if (IsViable.isSuccess()) return InlineResult::success(); return InlineResult::failure(IsViable.getFailureReason()); } // Never inline functions with conflicting attributes (unless callee has // always-inline attribute). Function *Caller = Call.getCaller(); if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI)) return InlineResult::failure("conflicting attributes"); // Don't inline this call if the caller has the optnone attribute. if (Caller->hasOptNone()) return InlineResult::failure("optnone attribute"); // Don't inline a function that treats null pointer as valid into a caller // that does not have this attribute. if (!Caller->nullPointerIsDefined() && Callee->nullPointerIsDefined()) return InlineResult::failure("nullptr definitions incompatible"); // Don't inline functions which can be interposed at link-time. if (Callee->isInterposable()) return InlineResult::failure("interposable"); // Don't inline functions marked noinline. if (Callee->hasFnAttribute(Attribute::NoInline)) return InlineResult::failure("noinline function attribute"); // Don't inline call sites marked noinline. if (Call.isNoInline()) return InlineResult::failure("noinline call site attribute"); return None; } InlineCost llvm::getInlineCost( CallBase &Call, Function *Callee, const InlineParams &Params, TargetTransformInfo &CalleeTTI, function_ref GetAssumptionCache, function_ref GetTLI, function_ref GetBFI, ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE) { auto UserDecision = llvm::getAttributeBasedInliningDecision(Call, Callee, CalleeTTI, GetTLI); if (UserDecision.hasValue()) { if (UserDecision->isSuccess()) return llvm::InlineCost::getAlways("always inline attribute"); return llvm::InlineCost::getNever(UserDecision->getFailureReason()); } LLVM_DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() << "... (caller:" << Call.getCaller()->getName() << ")\n"); InlineCostCallAnalyzer CA(*Callee, Call, Params, CalleeTTI, GetAssumptionCache, GetBFI, PSI, ORE); InlineResult ShouldInline = CA.analyze(); LLVM_DEBUG(CA.dump()); // Always make cost benefit based decision explicit. // We use always/never here since threshold is not meaningful, // as it's not what drives cost-benefit analysis. if (CA.wasDecidedByCostBenefit()) { if (ShouldInline.isSuccess()) return InlineCost::getAlways("benefit over cost", CA.getCostBenefitPair()); else return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair()); } if (CA.wasDecidedByCostThreshold()) return InlineCost::get(CA.getCost(), CA.getThreshold()); // No details on how the decision was made, simply return always or never. return ShouldInline.isSuccess() ? InlineCost::getAlways("empty function") : InlineCost::getNever(ShouldInline.getFailureReason()); } InlineResult llvm::isInlineViable(Function &F) { bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice); for (BasicBlock &BB : F) { // Disallow inlining of functions which contain indirect branches. if (isa(BB.getTerminator())) return InlineResult::failure("contains indirect branches"); // Disallow inlining of blockaddresses which are used by non-callbr // instructions. if (BB.hasAddressTaken()) for (User *U : BlockAddress::get(&BB)->users()) if (!isa(*U)) return InlineResult::failure("blockaddress used outside of callbr"); for (auto &II : BB) { CallBase *Call = dyn_cast(&II); if (!Call) continue; // Disallow recursive calls. Function *Callee = Call->getCalledFunction(); if (&F == Callee) return InlineResult::failure("recursive call"); // Disallow calls which expose returns-twice to a function not previously // attributed as such. if (!ReturnsTwice && isa(Call) && cast(Call)->canReturnTwice()) return InlineResult::failure("exposes returns-twice attribute"); if (Callee) switch (Callee->getIntrinsicID()) { default: break; case llvm::Intrinsic::icall_branch_funnel: // Disallow inlining of @llvm.icall.branch.funnel because current // backend can't separate call targets from call arguments. return InlineResult::failure( "disallowed inlining of @llvm.icall.branch.funnel"); case llvm::Intrinsic::localescape: // Disallow inlining functions that call @llvm.localescape. Doing this // correctly would require major changes to the inliner. return InlineResult::failure( "disallowed inlining of @llvm.localescape"); case llvm::Intrinsic::vastart: // Disallow inlining of functions that initialize VarArgs with // va_start. return InlineResult::failure( "contains VarArgs initialized with va_start"); } } } return InlineResult::success(); } // APIs to create InlineParams based on command line flags and/or other // parameters. InlineParams llvm::getInlineParams(int Threshold) { InlineParams Params; // This field is the threshold to use for a callee by default. This is // derived from one or more of: // * optimization or size-optimization levels, // * a value passed to createFunctionInliningPass function, or // * the -inline-threshold flag. // If the -inline-threshold flag is explicitly specified, that is used // irrespective of anything else. if (InlineThreshold.getNumOccurrences() > 0) Params.DefaultThreshold = InlineThreshold; else Params.DefaultThreshold = Threshold; // Set the HintThreshold knob from the -inlinehint-threshold. Params.HintThreshold = HintThreshold; // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold. Params.HotCallSiteThreshold = HotCallSiteThreshold; // If the -locally-hot-callsite-threshold is explicitly specified, use it to // populate LocallyHotCallSiteThreshold. Later, we populate // Params.LocallyHotCallSiteThreshold from -locally-hot-callsite-threshold if // we know that optimization level is O3 (in the getInlineParams variant that // takes the opt and size levels). // FIXME: Remove this check (and make the assignment unconditional) after // addressing size regression issues at O2. if (LocallyHotCallSiteThreshold.getNumOccurrences() > 0) Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; // Set the ColdCallSiteThreshold knob from the // -inline-cold-callsite-threshold. Params.ColdCallSiteThreshold = ColdCallSiteThreshold; // Set the OptMinSizeThreshold and OptSizeThreshold params only if the // -inlinehint-threshold commandline option is not explicitly given. If that // option is present, then its value applies even for callees with size and // minsize attributes. // If the -inline-threshold is not specified, set the ColdThreshold from the // -inlinecold-threshold even if it is not explicitly passed. If // -inline-threshold is specified, then -inlinecold-threshold needs to be // explicitly specified to set the ColdThreshold knob if (InlineThreshold.getNumOccurrences() == 0) { Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold; Params.OptSizeThreshold = InlineConstants::OptSizeThreshold; Params.ColdThreshold = ColdThreshold; } else if (ColdThreshold.getNumOccurrences() > 0) { Params.ColdThreshold = ColdThreshold; } return Params; } InlineParams llvm::getInlineParams() { return getInlineParams(DefaultThreshold); } // Compute the default threshold for inlining based on the opt level and the // size opt level. static int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel) { if (OptLevel > 2) return InlineConstants::OptAggressiveThreshold; if (SizeOptLevel == 1) // -Os return InlineConstants::OptSizeThreshold; if (SizeOptLevel == 2) // -Oz return InlineConstants::OptMinSizeThreshold; return DefaultThreshold; } InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) { auto Params = getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel)); // At O3, use the value of -locally-hot-callsite-threshold option to populate // Params.LocallyHotCallSiteThreshold. Below O3, this flag has effect only // when it is specified explicitly. if (OptLevel > 2) Params.LocallyHotCallSiteThreshold = LocallyHotCallSiteThreshold; return Params; } PreservedAnalyses InlineCostAnnotationPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { PrintInstructionComments = true; std::function GetAssumptionCache = [&](Function &F) -> AssumptionCache & { return FAM.getResult(F); }; Module *M = F.getParent(); ProfileSummaryInfo PSI(*M); DataLayout DL(M); TargetTransformInfo TTI(DL); // FIXME: Redesign the usage of InlineParams to expand the scope of this pass. // In the current implementation, the type of InlineParams doesn't matter as // the pass serves only for verification of inliner's decisions. // We can add a flag which determines InlineParams for this run. Right now, // the default InlineParams are used. const InlineParams Params = llvm::getInlineParams(); for (BasicBlock &BB : F) { for (Instruction &I : BB) { if (CallInst *CI = dyn_cast(&I)) { Function *CalledFunction = CI->getCalledFunction(); if (!CalledFunction || CalledFunction->isDeclaration()) continue; OptimizationRemarkEmitter ORE(CalledFunction); InlineCostCallAnalyzer ICCA(*CalledFunction, *CI, Params, TTI, GetAssumptionCache, nullptr, &PSI, &ORE); ICCA.analyze(); OS << " Analyzing call of " << CalledFunction->getName() << "... (caller:" << CI->getCaller()->getName() << ")\n"; ICCA.print(OS); OS << "\n"; } } } return PreservedAnalyses::all(); } diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 0dbbc218e946..bc03776bde19 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1,1833 +1,1845 @@ //===- MachineSink.cpp - Sinking for machine instructions -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This pass moves instructions into successor blocks when possible, so that // they aren't executed on paths where their results aren't needed. // // This pass is not intended to be a replacement or a complete alternative // for an LLVM-IR-level sinking pass. It is only designed to sink simple // constructs that are not exposed before lowering and instruction selection. // //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePostDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include #include #include using namespace llvm; #define DEBUG_TYPE "machine-sink" static cl::opt SplitEdges("machine-sink-split", cl::desc("Split critical edges during machine sinking"), cl::init(true), cl::Hidden); static cl::opt UseBlockFreqInfo("machine-sink-bfi", cl::desc("Use block frequency info to find successors to sink"), cl::init(true), cl::Hidden); static cl::opt SplitEdgeProbabilityThreshold( "machine-sink-split-probability-threshold", cl::desc( "Percentage threshold for splitting single-instruction critical edge. " "If the branch threshold is higher than this threshold, we allow " "speculative execution of up to 1 instruction to avoid branching to " "splitted critical edge"), cl::init(40), cl::Hidden); static cl::opt SinkLoadInstsPerBlockThreshold( "machine-sink-load-instrs-threshold", cl::desc("Do not try to find alias store for a load if there is a in-path " "block whose instruction number is higher than this threshold."), cl::init(2000), cl::Hidden); static cl::opt SinkLoadBlocksThreshold( "machine-sink-load-blocks-threshold", cl::desc("Do not try to find alias store for a load if the block number in " "the straight line is higher than this threshold."), cl::init(20), cl::Hidden); static cl::opt SinkInstsIntoLoop("sink-insts-to-avoid-spills", cl::desc("Sink instructions into loops to avoid " "register spills"), cl::init(false), cl::Hidden); static cl::opt SinkIntoLoopLimit( "machine-sink-loop-limit", cl::desc("The maximum number of instructions considered for loop sinking."), cl::init(50), cl::Hidden); STATISTIC(NumSunk, "Number of machine instructions sunk"); STATISTIC(NumLoopSunk, "Number of machine instructions sunk into a loop"); STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); namespace { class MachineSinking : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; // Machine register information MachineDominatorTree *DT; // Machine dominator tree MachinePostDominatorTree *PDT; // Machine post dominator tree MachineLoopInfo *LI; MachineBlockFrequencyInfo *MBFI; const MachineBranchProbabilityInfo *MBPI; AliasAnalysis *AA; RegisterClassInfo RegClassInfo; // Remember which edges have been considered for breaking. SmallSet, 8> CEBCandidates; // Remember which edges we are about to split. // This is different from CEBCandidates since those edges // will be split. SetVector> ToSplit; DenseSet RegsToClearKillFlags; using AllSuccsCache = std::map>; /// DBG_VALUE pointer and flag. The flag is true if this DBG_VALUE is /// post-dominated by another DBG_VALUE of the same variable location. /// This is necessary to detect sequences such as: /// %0 = someinst /// DBG_VALUE %0, !123, !DIExpression() /// %1 = anotherinst /// DBG_VALUE %1, !123, !DIExpression() /// Where if %0 were to sink, the DBG_VAUE should not sink with it, as that /// would re-order assignments. using SeenDbgUser = PointerIntPair; /// Record of DBG_VALUE uses of vregs in a block, so that we can identify /// debug instructions to sink. SmallDenseMap> SeenDbgUsers; /// Record of debug variables that have had their locations set in the /// current block. DenseSet SeenDbgVars; std::map, bool> HasStoreCache; std::map, std::vector> StoreInstrCache; /// Cached BB's register pressure. std::map> CachedRegisterPressure; public: static char ID; // Pass identification MachineSinking() : MachineFunctionPass(ID) { initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); if (UseBlockFreqInfo) AU.addRequired(); } void releaseMemory() override { CEBCandidates.clear(); } private: bool ProcessBlock(MachineBasicBlock &MBB); void ProcessDbgInst(MachineInstr &MI); bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To); bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, MachineInstr &MI); /// Postpone the splitting of the given critical /// edge (\p From, \p To). /// /// We do not split the edges on the fly. Indeed, this invalidates /// the dominance information and thus triggers a lot of updates /// of that information underneath. /// Instead, we postpone all the splits after each iteration of /// the main loop. That way, the information is at least valid /// for the lifetime of an iteration. /// /// \return True if the edge is marked as toSplit, false otherwise. /// False can be returned if, for instance, this is not profitable. bool PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To, bool BreakPHIEdge); bool SinkInstruction(MachineInstr &MI, bool &SawStore, AllSuccsCache &AllSuccessors); /// If we sink a COPY inst, some debug users of it's destination may no /// longer be dominated by the COPY, and will eventually be dropped. /// This is easily rectified by forwarding the non-dominated debug uses /// to the copy source. void SalvageUnsunkDebugUsersOfCopy(MachineInstr &, MachineBasicBlock *TargetBlock); bool AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const; MachineBasicBlock *FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); void FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates); bool SinkIntoLoop(MachineLoop *L, MachineInstr &I); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors); bool PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB); SmallVector & GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; std::vector &getBBRegisterPressure(MachineBasicBlock &MBB); }; } // end anonymous namespace char MachineSinking::ID = 0; char &llvm::MachineSinkingID = MachineSinking::ID; INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, "Machine code sinking", false, false) bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB) { if (!MI.isCopy()) return false; Register SrcReg = MI.getOperand(1).getReg(); Register DstReg = MI.getOperand(0).getReg(); if (!Register::isVirtualRegister(SrcReg) || !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg)) return false; const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); const TargetRegisterClass *DRC = MRI->getRegClass(DstReg); if (SRC != DRC) return false; MachineInstr *DefMI = MRI->getVRegDef(SrcReg); if (DefMI->isCopyLike()) return false; LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI); LLVM_DEBUG(dbgs() << "*** to: " << MI); MRI->replaceRegWith(DstReg, SrcReg); MI.eraseFromParent(); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. MRI->clearKillFlags(SrcReg); ++NumCoalesces; return true; } /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. If any use is in the /// definition block, then return false since it is never legal to move def /// after uses. bool MachineSinking::AllUsesDominatedByBlock(Register Reg, MachineBasicBlock *MBB, MachineBasicBlock *DefMBB, bool &BreakPHIEdge, bool &LocalUse) const { assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs"); // Ignore debug uses because debug info doesn't affect the code. if (MRI->use_nodbg_empty(Reg)) return true; // BreakPHIEdge is true if all the uses are in the successor MBB being sunken // into and they are all PHI nodes. In this case, machine-sink must break // the critical edge first. e.g. // // %bb.1: // Predecessors according to CFG: %bb.0 // ... // %def = DEC64_32r %x, implicit-def dead %eflags // ... // JE_4 <%bb.37>, implicit %eflags // Successors according to CFG: %bb.37 %bb.2 // // %bb.2: // %p = PHI %y, %bb.0, %def, %bb.1 if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) { MachineInstr *UseInst = MO.getParent(); unsigned OpNo = UseInst->getOperandNo(&MO); MachineBasicBlock *UseBlock = UseInst->getParent(); return UseBlock == MBB && UseInst->isPHI() && UseInst->getOperand(OpNo + 1).getMBB() == DefMBB; })) { BreakPHIEdge = true; return true; } for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { // Determine the block of the use. MachineInstr *UseInst = MO.getParent(); unsigned OpNo = &MO - &UseInst->getOperand(0); MachineBasicBlock *UseBlock = UseInst->getParent(); if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(OpNo+1).getMBB(); } else if (UseBlock == DefMBB) { LocalUse = true; return false; } // Check that it dominates. if (!DT->dominates(MBB, UseBlock)) return false; } return true; } /// Return true if this machine instruction loads from global offset table or /// constant pool. static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { assert(MI.mayLoad() && "Expected MI that loads!"); // If we lost memory operands, conservatively assume that the instruction // reads from everything.. if (MI.memoperands_empty()) return true; for (MachineMemOperand *MemOp : MI.memoperands()) if (const PseudoSourceValue *PSV = MemOp->getPseudoValue()) if (PSV->isGOT() || PSV->isConstantPool()) return true; return false; } void MachineSinking::FindLoopSinkCandidates(MachineLoop *L, MachineBasicBlock *BB, SmallVectorImpl &Candidates) { for (auto &MI : *BB) { LLVM_DEBUG(dbgs() << "LoopSink: Analysing candidate: " << MI); if (!TII->shouldSink(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction not a candidate for this " "target\n"); continue; } if (!L->isLoopInvariant(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction is not loop invariant\n"); continue; } bool DontMoveAcrossStore = true; if (!MI.isSafeToMove(AA, DontMoveAcrossStore)) { LLVM_DEBUG(dbgs() << "LoopSink: Instruction not safe to move.\n"); continue; } if (MI.mayLoad() && !mayLoadFromGOTOrConstantPool(MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Dont sink GOT or constant pool loads\n"); continue; } if (MI.isConvergent()) continue; const MachineOperand &MO = MI.getOperand(0); if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; if (!MRI->hasOneDef(MO.getReg())) continue; LLVM_DEBUG(dbgs() << "LoopSink: Instruction added as candidate.\n"); Candidates.push_back(&MI); } } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; LLVM_DEBUG(dbgs() << "******** Machine Sinking ********\n"); TII = MF.getSubtarget().getInstrInfo(); TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); DT = &getAnalysis(); PDT = &getAnalysis(); LI = &getAnalysis(); MBFI = UseBlockFreqInfo ? &getAnalysis() : nullptr; MBPI = &getAnalysis(); AA = &getAnalysis().getAAResults(); RegClassInfo.runOnMachineFunction(MF); + // MachineSink currently uses MachineLoopInfo, which only recognizes natural + // loops. As such, we could sink instructions into irreducible cycles, which + // would be non-profitable. + // WARNING: The current implementation of hasStoreBetween() is incorrect for + // sinking into irreducible cycles (PR53990), this bailout is currently + // necessary for correctness, not just profitability. + ReversePostOrderTraversal RPOT(&*MF.begin()); + if (containsIrreducibleCFG(RPOT, *LI)) + return false; + bool EverMadeChange = false; while (true) { bool MadeChange = false; // Process all basic blocks. CEBCandidates.clear(); ToSplit.clear(); for (auto &MBB: MF) MadeChange |= ProcessBlock(MBB); // If we have anything we marked as toSplit, split it now. for (auto &Pair : ToSplit) { auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, *this); if (NewSucc != nullptr) { LLVM_DEBUG(dbgs() << " *** Splitting critical edge: " << printMBBReference(*Pair.first) << " -- " << printMBBReference(*NewSucc) << " -- " << printMBBReference(*Pair.second) << '\n'); if (MBFI) MBFI->onEdgeSplit(*Pair.first, *NewSucc, *MBPI); MadeChange = true; ++NumSplit; } else LLVM_DEBUG(dbgs() << " *** Not legal to break critical edge\n"); } // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; } if (SinkInstsIntoLoop) { SmallVector Loops(LI->begin(), LI->end()); for (auto *L : Loops) { MachineBasicBlock *Preheader = LI->findLoopPreheader(L); if (!Preheader) { LLVM_DEBUG(dbgs() << "LoopSink: Can't find preheader\n"); continue; } SmallVector Candidates; FindLoopSinkCandidates(L, Preheader, Candidates); // Walk the candidates in reverse order so that we start with the use // of a def-use chain, if there is any. // TODO: Sort the candidates using a cost-model. unsigned i = 0; for (MachineInstr *I : llvm::reverse(Candidates)) { if (i++ == SinkIntoLoopLimit) { LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to " "be analysed."); break; } if (!SinkIntoLoop(L, *I)) break; EverMadeChange = true; ++NumLoopSunk; } } } HasStoreCache.clear(); StoreInstrCache.clear(); // Now clear any kill flags for recorded registers. for (auto I : RegsToClearKillFlags) MRI->clearKillFlags(I); RegsToClearKillFlags.clear(); return EverMadeChange; } bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { // Can't sink anything out of a block that has less than two successors. if (MBB.succ_size() <= 1 || MBB.empty()) return false; // Don't bother sinking code out of unreachable blocks. In addition to being // unprofitable, it can also lead to infinite looping, because in an // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; // Cache all successors, sorted by frequency info and loop depth. AllSuccsCache AllSuccessors; // Walk the basic block bottom-up. Remember if we saw a store. MachineBasicBlock::iterator I = MBB.end(); --I; bool ProcessedBegin, SawStore = false; do { MachineInstr &MI = *I; // The instruction to sink. // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. ProcessedBegin = I == MBB.begin(); if (!ProcessedBegin) --I; if (MI.isDebugOrPseudoInstr()) { if (MI.isDebugValue()) ProcessDbgInst(MI); continue; } bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); if (Joined) { MadeChange = true; continue; } if (SinkInstruction(MI, SawStore, AllSuccessors)) { ++NumSunk; MadeChange = true; } // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); SeenDbgUsers.clear(); SeenDbgVars.clear(); // recalculate the bb register pressure after sinking one BB. CachedRegisterPressure.clear(); return MadeChange; } void MachineSinking::ProcessDbgInst(MachineInstr &MI) { // When we see DBG_VALUEs for registers, record any vreg it reads, so that // we know what to sink if the vreg def sinks. assert(MI.isDebugValue() && "Expected DBG_VALUE for processing"); DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(), MI.getDebugLoc()->getInlinedAt()); bool SeenBefore = SeenDbgVars.contains(Var); for (MachineOperand &MO : MI.debug_operands()) { if (MO.isReg() && MO.getReg().isVirtual()) SeenDbgUsers[MO.getReg()].push_back(SeenDbgUser(&MI, SeenBefore)); } // Record the variable for any DBG_VALUE, to avoid re-ordering any of them. SeenDbgVars.insert(Var); } bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To) { // FIXME: Need much better heuristics. // If the pass has already considered breaking this edge (during this pass // through the function), then let's go ahead and break it. This means // sinking multiple "cheap" instructions into the same block. if (!CEBCandidates.insert(std::make_pair(From, To)).second) return true; if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI)) return true; if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <= BranchProbability(SplitEdgeProbabilityThreshold, 100)) return true; // MI is cheap, we probably don't want to break the critical edge for it. // However, if this would allow some definitions of its source operands // to be sunk then it's probably worth it. for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isUse()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; // We don't move live definitions of physical registers, // so sinking their uses won't enable any opportunities. if (Register::isPhysicalRegister(Reg)) continue; // If this instruction is the only user of a virtual register, // check if breaking the edge will enable sinking // both this instruction and the defining instruction. if (MRI->hasOneNonDBGUse(Reg)) { // If the definition resides in same MBB, // claim it's likely we can sink these together. // If definition resides elsewhere, we aren't // blocking it from being sunk so don't break the edge. MachineInstr *DefMI = MRI->getVRegDef(Reg); if (DefMI->getParent() == MI.getParent()) return true; } } return false; } bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, MachineBasicBlock *FromBB, MachineBasicBlock *ToBB, bool BreakPHIEdge) { if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) return false; // Avoid breaking back edge. From == To means backedge for single BB loop. if (!SplitEdges || FromBB == ToBB) return false; // Check for backedges of more "complex" loops. if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && LI->isLoopHeader(ToBB)) return false; // It's not always legal to break critical edges and sink the computation // to the edge. // // %bb.1: // v1024 // Beq %bb.3 // // %bb.2: // ... no uses of v1024 // // %bb.3: // ... // = v1024 // // If %bb.1 -> %bb.3 edge is broken and computation of v1024 is inserted: // // %bb.1: // ... // Bne %bb.2 // %bb.4: // v1024 = // B %bb.3 // %bb.2: // ... no uses of v1024 // // %bb.3: // ... // = v1024 // // This is incorrect since v1024 is not computed along the %bb.1->%bb.2->%bb.3 // flow. We need to ensure the new basic block where the computation is // sunk to dominates all the uses. // It's only legal to break critical edge and sink the computation to the // new block if all the predecessors of "To", except for "From", are // not dominated by "From". Given SSA property, this means these // predecessors are dominated by "To". // // There is no need to do this check if all the uses are PHI nodes. PHI // sources are only defined on the specific predecessor edges. if (!BreakPHIEdge) { for (MachineBasicBlock *Pred : ToBB->predecessors()) if (Pred != FromBB && !DT->dominates(ToBB, Pred)) return false; } ToSplit.insert(std::make_pair(FromBB, ToBB)); return true; } std::vector & MachineSinking::getBBRegisterPressure(MachineBasicBlock &MBB) { // Currently to save compiling time, MBB's register pressure will not change // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's // register pressure is changed after sinking any instructions into it. // FIXME: need a accurate and cheap register pressure estiminate model here. auto RP = CachedRegisterPressure.find(&MBB); if (RP != CachedRegisterPressure.end()) return RP->second; RegionPressure Pressure; RegPressureTracker RPTracker(Pressure); // Initialize the register pressure tracker. RPTracker.init(MBB.getParent(), &RegClassInfo, nullptr, &MBB, MBB.end(), /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); for (MachineBasicBlock::iterator MII = MBB.instr_end(), MIE = MBB.instr_begin(); MII != MIE; --MII) { MachineInstr &MI = *std::prev(MII); if (MI.isDebugInstr() || MI.isPseudoProbe()) continue; RegisterOperands RegOpers; RegOpers.collect(MI, *TRI, *MRI, false, false); RPTracker.recedeSkipDebugValues(); assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); RPTracker.recede(RegOpers); } RPTracker.closeRegion(); auto It = CachedRegisterPressure.insert( std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure)); return It.first->second; } /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *SuccToSinkTo, AllSuccsCache &AllSuccessors) { assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); if (MBB == SuccToSinkTo) return false; // It is profitable if SuccToSinkTo does not post dominate current block. if (!PDT->dominates(SuccToSinkTo, MBB)) return true; // It is profitable to sink an instruction from a deeper loop to a shallower // loop, even if the latter post-dominates the former (PR21115). if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) return true; // Check if only use in post dominated block is PHI instruction. bool NonPHIUse = false; for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { MachineBasicBlock *UseBlock = UseInst.getParent(); if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) NonPHIUse = true; } if (!NonPHIUse) return true; // If SuccToSinkTo post dominates then also it may be profitable if MI // can further profitably sinked into another block in next round. bool BreakPHIEdge = false; // FIXME - If finding successor is compile time expensive then cache results. if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); MachineLoop *ML = LI->getLoopFor(MBB); // If the instruction is not inside a loop, it is not profitable to sink MI to // a post dominate block SuccToSinkTo. if (!ML) return false; auto isRegisterPressureSetExceedLimit = [&](const TargetRegisterClass *RC) { unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; const int *PS = TRI->getRegClassPressureSets(RC); // Get register pressure for block SuccToSinkTo. std::vector BBRegisterPressure = getBBRegisterPressure(*SuccToSinkTo); for (; *PS != -1; PS++) // check if any register pressure set exceeds limit in block SuccToSinkTo // after sinking. if (Weight + BBRegisterPressure[*PS] >= TRI->getRegPressureSetLimit(*MBB->getParent(), *PS)) return true; return false; }; // If this instruction is inside a loop and sinking this instruction can make // more registers live range shorten, it is still prifitable. for (const MachineOperand &MO : MI.operands()) { // Ignore non-register operands. if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (Reg == 0) continue; if (Register::isPhysicalRegister(Reg)) { if (MO.isUse() && (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO))) continue; // Don't handle non-constant and non-ignorable physical register. return false; } // Users for the defs are all dominated by SuccToSinkTo. if (MO.isDef()) { // This def register's live range is shortened after sinking. bool LocalUse = false; if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) return false; } else { MachineInstr *DefMI = MRI->getVRegDef(Reg); // DefMI is defined outside of loop. There should be no live range // impact for this operand. Defination outside of loop means: // 1: defination is outside of loop. // 2: defination is in this loop, but it is a PHI in the loop header. if (LI->getLoopFor(DefMI->getParent()) != ML || (DefMI->isPHI() && LI->isLoopHeader(DefMI->getParent()))) continue; // The DefMI is defined inside the loop. // If sinking this operand makes some register pressure set exceed limit, // it is not profitable. if (isRegisterPressureSetExceedLimit(MRI->getRegClass(Reg))) { LLVM_DEBUG(dbgs() << "register pressure exceed limit, not profitable."); return false; } } } // If MI is in loop and all its operands are alive across the whole loop or if // no operand sinking make register pressure set exceed limit, it is // profitable to sink MI. return true; } /// Get the sorted sequence of successors for this MachineBasicBlock, possibly /// computing it if it was not already cached. SmallVector & MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const { // Do we have the sorted successors in cache ? auto Succs = AllSuccessors.find(MBB); if (Succs != AllSuccessors.end()) return Succs->second; SmallVector AllSuccs(MBB->successors()); // Handle cases where sinking can happen but where the sink point isn't a // successor. For example: // // x = computation // if () {} else {} // use x // for (MachineDomTreeNode *DTChild : DT->getNode(MBB)->children()) { // DomTree children of MBB that have MBB as immediate dominator are added. if (DTChild->getIDom()->getBlock() == MI.getParent() && // Skip MBBs already added to the AllSuccs vector above. !MBB->isSuccessor(DTChild->getBlock())) AllSuccs.push_back(DTChild->getBlock()); } // Sort Successors according to their loop depth or block frequency info. llvm::stable_sort( AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; return HasBlockFreq ? LHSFreq < RHSFreq : LI->getLoopDepth(L) < LI->getLoopDepth(R); }); auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); return it.first->second; } /// FindSuccToSinkTo - Find a successor to sink this instruction to. MachineBasicBlock * MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB, bool &BreakPHIEdge, AllSuccsCache &AllSuccessors) { assert (MBB && "Invalid MachineBasicBlock!"); // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = nullptr; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; // Ignore non-register operands. Register Reg = MO.getReg(); if (Reg == 0) continue; if (Register::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register // and we can freely move its uses. Alternatively, if it's allocatable, // it could get allocated to something with a def during allocation. if (!MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO)) return nullptr; } else if (!MO.isDead()) { // A def that isn't dead. We can't move it. return nullptr; } } else { // Virtual register uses are always safe to sink. if (MO.isUse()) continue; // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) return nullptr; // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. bool LocalUse = false; if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, BreakPHIEdge, LocalUse)) return nullptr; continue; } // Otherwise, we should look at all the successors and decide which one // we should sink to. If we have reliable block frequency information // (frequency != 0) available, give successors with smaller frequencies // higher priority, otherwise prioritize smaller loop depths. for (MachineBasicBlock *SuccBlock : GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, BreakPHIEdge, LocalUse)) { SuccToSinkTo = SuccBlock; break; } if (LocalUse) // Def is used locally, it's never safe to move this def. return nullptr; } // If we couldn't find a block to sink to, ignore this instruction. if (!SuccToSinkTo) return nullptr; if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors)) return nullptr; } } // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MBB == SuccToSinkTo) return nullptr; // It's not safe to sink instructions to EH landing pad. Control flow into // landing pad is implicitly defined. if (SuccToSinkTo && SuccToSinkTo->isEHPad()) return nullptr; // It ought to be okay to sink instructions into an INLINEASM_BR target, but // only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in // the source block (which this code does not yet do). So for now, forbid // doing so. if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget()) return nullptr; return SuccToSinkTo; } /// Return true if MI is likely to be usable as a memory operation by the /// implicit null check optimization. /// /// This is a "best effort" heuristic, and should not be relied upon for /// correctness. This returning true does not guarantee that the implicit null /// check optimization is legal over MI, and this returning false does not /// guarantee MI cannot possibly be used to do a null check. static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { using MachineBranchPredicate = TargetInstrInfo::MachineBranchPredicate; auto *MBB = MI.getParent(); if (MBB->pred_size() != 1) return false; auto *PredMBB = *MBB->pred_begin(); auto *PredBB = PredMBB->getBasicBlock(); // Frontends that don't use implicit null checks have no reason to emit // branches with make.implicit metadata, and this function should always // return false for them. if (!PredBB || !PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit)) return false; const MachineOperand *BaseOp; int64_t Offset; bool OffsetIsScalable; if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI)) return false; if (!BaseOp->isReg()) return false; if (!(MI.mayLoad() && !MI.isPredicable())) return false; MachineBranchPredicate MBP; if (TII->analyzeBranchPredicate(*PredMBB, MBP, false)) return false; return MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && (MBP.Predicate == MachineBranchPredicate::PRED_NE || MBP.Predicate == MachineBranchPredicate::PRED_EQ) && MBP.LHS.getReg() == BaseOp->getReg(); } /// If the sunk instruction is a copy, try to forward the copy instead of /// leaving an 'undef' DBG_VALUE in the original location. Don't do this if /// there's any subregister weirdness involved. Returns true if copy /// propagation occurred. static bool attemptDebugCopyProp(MachineInstr &SinkInst, MachineInstr &DbgMI, Register Reg) { const MachineRegisterInfo &MRI = SinkInst.getMF()->getRegInfo(); const TargetInstrInfo &TII = *SinkInst.getMF()->getSubtarget().getInstrInfo(); // Copy DBG_VALUE operand and set the original to undef. We then check to // see whether this is something that can be copy-forwarded. If it isn't, // continue around the loop. const MachineOperand *SrcMO = nullptr, *DstMO = nullptr; auto CopyOperands = TII.isCopyInstr(SinkInst); if (!CopyOperands) return false; SrcMO = CopyOperands->Source; DstMO = CopyOperands->Destination; // Check validity of forwarding this copy. bool PostRA = MRI.getNumVirtRegs() == 0; // Trying to forward between physical and virtual registers is too hard. if (Reg.isVirtual() != SrcMO->getReg().isVirtual()) return false; // Only try virtual register copy-forwarding before regalloc, and physical // register copy-forwarding after regalloc. bool arePhysRegs = !Reg.isVirtual(); if (arePhysRegs != PostRA) return false; // Pre-regalloc, only forward if all subregisters agree (or there are no // subregs at all). More analysis might recover some forwardable copies. if (!PostRA) for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) if (DbgMO.getSubReg() != SrcMO->getSubReg() || DbgMO.getSubReg() != DstMO->getSubReg()) return false; // Post-regalloc, we may be sinking a DBG_VALUE of a sub or super-register // of this copy. Only forward the copy if the DBG_VALUE operand exactly // matches the copy destination. if (PostRA && Reg != DstMO->getReg()) return false; for (auto &DbgMO : DbgMI.getDebugOperandsForReg(Reg)) { DbgMO.setReg(SrcMO->getReg()); DbgMO.setSubReg(SrcMO->getSubReg()); } return true; } using MIRegs = std::pair>; /// Sink an instruction and its associated debug instructions. static void performSink(MachineInstr &MI, MachineBasicBlock &SuccToSinkTo, MachineBasicBlock::iterator InsertPos, SmallVectorImpl &DbgValuesToSink) { // If we cannot find a location to use (merge with), then we erase the debug // location to prevent debug-info driven tools from potentially reporting // wrong location information. if (!SuccToSinkTo.empty() && InsertPos != SuccToSinkTo.end()) MI.setDebugLoc(DILocation::getMergedLocation(MI.getDebugLoc(), InsertPos->getDebugLoc())); else MI.setDebugLoc(DebugLoc()); // Move the instruction. MachineBasicBlock *ParentBlock = MI.getParent(); SuccToSinkTo.splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); // Sink a copy of debug users to the insert position. Mark the original // DBG_VALUE location as 'undef', indicating that any earlier variable // location should be terminated as we've optimised away the value at this // point. for (auto DbgValueToSink : DbgValuesToSink) { MachineInstr *DbgMI = DbgValueToSink.first; MachineInstr *NewDbgMI = DbgMI->getMF()->CloneMachineInstr(DbgMI); SuccToSinkTo.insert(InsertPos, NewDbgMI); bool PropagatedAllSunkOps = true; for (unsigned Reg : DbgValueToSink.second) { if (DbgMI->hasDebugOperandForReg(Reg)) { if (!attemptDebugCopyProp(MI, *DbgMI, Reg)) { PropagatedAllSunkOps = false; break; } } } if (!PropagatedAllSunkOps) DbgMI->setDebugValueUndef(); } } /// hasStoreBetween - check if there is store betweeen straight line blocks From /// and To. bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, MachineInstr &MI) { // Make sure From and To are in straight line which means From dominates To // and To post dominates From. if (!DT->dominates(From, To) || !PDT->dominates(To, From)) return true; auto BlockPair = std::make_pair(From, To); // Does these two blocks pair be queried before and have a definite cached // result? if (HasStoreCache.find(BlockPair) != HasStoreCache.end()) return HasStoreCache[BlockPair]; if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end()) return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) { return I->mayAlias(AA, MI, false); }); bool SawStore = false; bool HasAliasedStore = false; DenseSet HandledBlocks; DenseSet HandledDomBlocks; // Go through all reachable blocks from From. for (MachineBasicBlock *BB : depth_first(From)) { // We insert the instruction at the start of block To, so no need to worry // about stores inside To. // Store in block From should be already considered when just enter function // SinkInstruction. if (BB == To || BB == From) continue; // We already handle this BB in previous iteration. if (HandledBlocks.count(BB)) continue; HandledBlocks.insert(BB); // To post dominates BB, it must be a path from block From. if (PDT->dominates(To, BB)) { if (!HandledDomBlocks.count(BB)) HandledDomBlocks.insert(BB); // If this BB is too big or the block number in straight line between From // and To is too big, stop searching to save compiling time. if (BB->size() > SinkLoadInstsPerBlockThreshold || HandledDomBlocks.size() > SinkLoadBlocksThreshold) { for (auto *DomBB : HandledDomBlocks) { if (DomBB != BB && DT->dominates(DomBB, BB)) HasStoreCache[std::make_pair(DomBB, To)] = true; else if(DomBB != BB && DT->dominates(BB, DomBB)) HasStoreCache[std::make_pair(From, DomBB)] = true; } HasStoreCache[BlockPair] = true; return true; } for (MachineInstr &I : *BB) { // Treat as alias conservatively for a call or an ordered memory // operation. if (I.isCall() || I.hasOrderedMemoryRef()) { for (auto *DomBB : HandledDomBlocks) { if (DomBB != BB && DT->dominates(DomBB, BB)) HasStoreCache[std::make_pair(DomBB, To)] = true; else if(DomBB != BB && DT->dominates(BB, DomBB)) HasStoreCache[std::make_pair(From, DomBB)] = true; } HasStoreCache[BlockPair] = true; return true; } if (I.mayStore()) { SawStore = true; // We still have chance to sink MI if all stores between are not // aliased to MI. // Cache all store instructions, so that we don't need to go through // all From reachable blocks for next load instruction. if (I.mayAlias(AA, MI, false)) HasAliasedStore = true; StoreInstrCache[BlockPair].push_back(&I); } } } } // If there is no store at all, cache the result. if (!SawStore) HasStoreCache[BlockPair] = false; return HasAliasedStore; } /// Sink instructions into loops if profitable. This especially tries to prevent /// register spills caused by register pressure if there is little to no /// overhead moving instructions into loops. bool MachineSinking::SinkIntoLoop(MachineLoop *L, MachineInstr &I) { LLVM_DEBUG(dbgs() << "LoopSink: Finding sink block for: " << I); MachineBasicBlock *Preheader = L->getLoopPreheader(); assert(Preheader && "Loop sink needs a preheader block"); MachineBasicBlock *SinkBlock = nullptr; bool CanSink = true; const MachineOperand &MO = I.getOperand(0); for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { LLVM_DEBUG(dbgs() << "LoopSink: Analysing use: " << MI); if (!L->contains(&MI)) { LLVM_DEBUG(dbgs() << "LoopSink: Use not in loop, can't sink.\n"); CanSink = false; break; } // FIXME: Come up with a proper cost model that estimates whether sinking // the instruction (and thus possibly executing it on every loop // iteration) is more expensive than a register. // For now assumes that copies are cheap and thus almost always worth it. if (!MI.isCopy()) { LLVM_DEBUG(dbgs() << "LoopSink: Use is not a copy\n"); CanSink = false; break; } if (!SinkBlock) { SinkBlock = MI.getParent(); LLVM_DEBUG(dbgs() << "LoopSink: Setting sink block to: " << printMBBReference(*SinkBlock) << "\n"); continue; } SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); if (!SinkBlock) { LLVM_DEBUG(dbgs() << "LoopSink: Can't find nearest dominator\n"); CanSink = false; break; } LLVM_DEBUG(dbgs() << "LoopSink: Setting nearest common dom block: " << printMBBReference(*SinkBlock) << "\n"); } if (!CanSink) { LLVM_DEBUG(dbgs() << "LoopSink: Can't sink instruction.\n"); return false; } if (!SinkBlock) { LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, can't find sink block.\n"); return false; } if (SinkBlock == Preheader) { LLVM_DEBUG(dbgs() << "LoopSink: Not sinking, sink block is the preheader\n"); return false; } if (SinkBlock->size() > SinkLoadInstsPerBlockThreshold) { LLVM_DEBUG(dbgs() << "LoopSink: Not Sinking, block too large to analyse.\n"); return false; } LLVM_DEBUG(dbgs() << "LoopSink: Sinking instruction!\n"); SinkBlock->splice(SinkBlock->getFirstNonPHI(), Preheader, I); // The instruction is moved from its basic block, so do not retain the // debug information. assert(!I.isDebugInstr() && "Should not sink debug inst"); I.setDebugLoc(DebugLoc()); return true; } /// SinkInstruction - Determine whether it is safe to sink the specified machine /// instruction out of its current block into a successor. bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, AllSuccsCache &AllSuccessors) { // Don't sink instructions that the target prefers not to sink. if (!TII->shouldSink(MI)) return false; // Check if it's safe to move the instruction. if (!MI.isSafeToMove(AA, SawStore)) return false; // Convergent operations may not be made control-dependent on additional // values. if (MI.isConvergent()) return false; // Don't break implicit null checks. This is a performance heuristic, and not // required for correctness. if (SinkingPreventsImplicitNullCheck(MI, TII, TRI)) return false; // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to // also sink them down before their first use in the block. This xform has to // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. bool BreakPHIEdge = false; MachineBasicBlock *ParentBlock = MI.getParent(); MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors); // If there are no outputs, it must have side-effects. if (!SuccToSinkTo) return false; // If the instruction to move defines a dead physical register which is live // when leaving the basic block, don't move it because it could turn into a // "zombie" define of that preg. E.g., EFLAGS. () for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || MO.isUse()) continue; Register Reg = MO.getReg(); if (Reg == 0 || !Register::isPhysicalRegister(Reg)) continue; if (SuccToSinkTo->isLiveIn(Reg)) return false; } LLVM_DEBUG(dbgs() << "Sink instr " << MI << "\tinto block " << *SuccToSinkTo); // If the block has multiple predecessors, this is a critical edge. // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. bool TryBreak = false; bool Store = MI.mayLoad() ? hasStoreBetween(ParentBlock, SuccToSinkTo, MI) : true; if (!MI.isSafeToMove(AA, Store)) { LLVM_DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); TryBreak = true; } // We don't want to sink across a critical edge if we don't dominate the // successor. We could be introducing calculations to new code paths. if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { LLVM_DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); TryBreak = true; } // Don't sink instructions into a loop. if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { LLVM_DEBUG(dbgs() << " *** NOTE: Loop header found\n"); TryBreak = true; } // Otherwise we are OK with sinking along a critical edge. if (!TryBreak) LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n"); else { // Mark this edge as to be split. // If the edge can actually be split, the next iteration of the main loop // will sink MI in the newly created block. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } } if (BreakPHIEdge) { // BreakPHIEdge is true if all the uses are in the successor MBB being // sunken into and they are all PHI nodes. In this case, machine-sink must // break the critical edge first. bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); if (!Status) LLVM_DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " "break critical edge\n"); // The instruction will not be sunk this time. return false; } // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; // Collect debug users of any vreg that this inst defines. SmallVector DbgUsersToSink; for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; if (!SeenDbgUsers.count(MO.getReg())) continue; // Sink any users that don't pass any other DBG_VALUEs for this variable. auto &Users = SeenDbgUsers[MO.getReg()]; for (auto &User : Users) { MachineInstr *DbgMI = User.getPointer(); if (User.getInt()) { // This DBG_VALUE would re-order assignments. If we can't copy-propagate // it, it can't be recovered. Set it undef. if (!attemptDebugCopyProp(MI, *DbgMI, MO.getReg())) DbgMI->setDebugValueUndef(); } else { DbgUsersToSink.push_back( {DbgMI, SmallVector(1, MO.getReg())}); } } } // After sinking, some debug users may not be dominated any more. If possible, // copy-propagate their operands. As it's expensive, don't do this if there's // no debuginfo in the program. if (MI.getMF()->getFunction().getSubprogram() && MI.isCopy()) SalvageUnsunkDebugUsersOfCopy(MI, SuccToSinkTo); performSink(MI, *SuccToSinkTo, InsertPos, DbgUsersToSink); // Conservatively, clear any kill flags, since it's possible that they are no // longer correct. // Note that we have to clear the kill flags for any register this instruction // uses as we may sink over another instruction which currently kills the // used registers. for (MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.isUse()) RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags. } return true; } void MachineSinking::SalvageUnsunkDebugUsersOfCopy( MachineInstr &MI, MachineBasicBlock *TargetBlock) { assert(MI.isCopy()); assert(MI.getOperand(1).isReg()); // Enumerate all users of vreg operands that are def'd. Skip those that will // be sunk. For the rest, if they are not dominated by the block we will sink // MI into, propagate the copy source to them. SmallVector DbgDefUsers; SmallVector DbgUseRegs; const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual()) continue; DbgUseRegs.push_back(MO.getReg()); for (auto &User : MRI.use_instructions(MO.getReg())) { if (!User.isDebugValue() || DT->dominates(TargetBlock, User.getParent())) continue; // If is in same block, will either sink or be use-before-def. if (User.getParent() == MI.getParent()) continue; assert(User.hasDebugOperandForReg(MO.getReg()) && "DBG_VALUE user of vreg, but has no operand for it?"); DbgDefUsers.push_back(&User); } } // Point the users of this copy that are no longer dominated, at the source // of the copy. for (auto *User : DbgDefUsers) { for (auto &Reg : DbgUseRegs) { for (auto &DbgOp : User->getDebugOperandsForReg(Reg)) { DbgOp.setReg(MI.getOperand(1).getReg()); DbgOp.setSubReg(MI.getOperand(1).getSubReg()); } } } } //===----------------------------------------------------------------------===// // This pass is not intended to be a replacement or a complete alternative // for the pre-ra machine sink pass. It is only designed to sink COPY // instructions which should be handled after RA. // // This pass sinks COPY instructions into a successor block, if the COPY is not // used in the current block and the COPY is live-in to a single successor // (i.e., doesn't require the COPY to be duplicated). This avoids executing the // copy on paths where their results aren't needed. This also exposes // additional opportunites for dead copy elimination and shrink wrapping. // // These copies were either not handled by or are inserted after the MachineSink // pass. As an example of the former case, the MachineSink pass cannot sink // COPY instructions with allocatable source registers; for AArch64 these type // of copy instructions are frequently used to move function parameters (PhyReg) // into virtual registers in the entry block. // // For the machine IR below, this pass will sink %w19 in the entry into its // successor (%bb.1) because %w19 is only live-in in %bb.1. // %bb.0: // %wzr = SUBSWri %w1, 1 // %w19 = COPY %w0 // Bcc 11, %bb.2 // %bb.1: // Live Ins: %w19 // BL @fun // %w0 = ADDWrr %w0, %w19 // RET %w0 // %bb.2: // %w0 = COPY %wzr // RET %w0 // As we sink %w19 (CSR in AArch64) into %bb.1, the shrink-wrapping pass will be // able to see %bb.0 as a candidate. //===----------------------------------------------------------------------===// namespace { class PostRAMachineSinking : public MachineFunctionPass { public: bool runOnMachineFunction(MachineFunction &MF) override; static char ID; PostRAMachineSinking() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "PostRA Machine Sink"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } MachineFunctionProperties getRequiredProperties() const override { return MachineFunctionProperties().set( MachineFunctionProperties::Property::NoVRegs); } private: /// Track which register units have been modified and used. LiveRegUnits ModifiedRegUnits, UsedRegUnits; /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an /// entry in this map for each unit it touches. The DBG_VALUE's entry /// consists of a pointer to the instruction itself, and a vector of registers /// referred to by the instruction that overlap the key register unit. DenseMap> SeenDbgInstrs; /// Sink Copy instructions unused in the same block close to their uses in /// successors. bool tryToSinkCopy(MachineBasicBlock &BB, MachineFunction &MF, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII); }; } // namespace char PostRAMachineSinking::ID = 0; char &llvm::PostRAMachineSinkingID = PostRAMachineSinking::ID; INITIALIZE_PASS(PostRAMachineSinking, "postra-machine-sink", "PostRA Machine Sink", false, false) static bool aliasWithRegsInLiveIn(MachineBasicBlock &MBB, unsigned Reg, const TargetRegisterInfo *TRI) { LiveRegUnits LiveInRegUnits(*TRI); LiveInRegUnits.addLiveIns(MBB); return !LiveInRegUnits.available(Reg); } static MachineBasicBlock * getSingleLiveInSuccBB(MachineBasicBlock &CurBB, const SmallPtrSetImpl &SinkableBBs, unsigned Reg, const TargetRegisterInfo *TRI) { // Try to find a single sinkable successor in which Reg is live-in. MachineBasicBlock *BB = nullptr; for (auto *SI : SinkableBBs) { if (aliasWithRegsInLiveIn(*SI, Reg, TRI)) { // If BB is set here, Reg is live-in to at least two sinkable successors, // so quit. if (BB) return nullptr; BB = SI; } } // Reg is not live-in to any sinkable successors. if (!BB) return nullptr; // Check if any register aliased with Reg is live-in in other successors. for (auto *SI : CurBB.successors()) { if (!SinkableBBs.count(SI) && aliasWithRegsInLiveIn(*SI, Reg, TRI)) return nullptr; } return BB; } static MachineBasicBlock * getSingleLiveInSuccBB(MachineBasicBlock &CurBB, const SmallPtrSetImpl &SinkableBBs, ArrayRef DefedRegsInCopy, const TargetRegisterInfo *TRI) { MachineBasicBlock *SingleBB = nullptr; for (auto DefReg : DefedRegsInCopy) { MachineBasicBlock *BB = getSingleLiveInSuccBB(CurBB, SinkableBBs, DefReg, TRI); if (!BB || (SingleBB && SingleBB != BB)) return nullptr; SingleBB = BB; } return SingleBB; } static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB, SmallVectorImpl &UsedOpsInCopy, LiveRegUnits &UsedRegUnits, const TargetRegisterInfo *TRI) { for (auto U : UsedOpsInCopy) { MachineOperand &MO = MI->getOperand(U); Register SrcReg = MO.getReg(); if (!UsedRegUnits.available(SrcReg)) { MachineBasicBlock::iterator NI = std::next(MI->getIterator()); for (MachineInstr &UI : make_range(NI, CurBB.end())) { if (UI.killsRegister(SrcReg, TRI)) { UI.clearRegisterKills(SrcReg, TRI); MO.setIsKill(true); break; } } } } } static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB, SmallVectorImpl &UsedOpsInCopy, SmallVectorImpl &DefedRegsInCopy) { MachineFunction &MF = *SuccBB->getParent(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); for (unsigned DefReg : DefedRegsInCopy) for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S) SuccBB->removeLiveIn(*S); for (auto U : UsedOpsInCopy) { Register SrcReg = MI->getOperand(U).getReg(); LaneBitmask Mask; for (MCRegUnitMaskIterator S(SrcReg, TRI); S.isValid(); ++S) { Mask |= (*S).second; } SuccBB->addLiveIn(SrcReg, Mask.any() ? Mask : LaneBitmask::getAll()); } SuccBB->sortUniqueLiveIns(); } static bool hasRegisterDependency(MachineInstr *MI, SmallVectorImpl &UsedOpsInCopy, SmallVectorImpl &DefedRegsInCopy, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits) { bool HasRegDependency = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; Register Reg = MO.getReg(); if (!Reg) continue; if (MO.isDef()) { if (!ModifiedRegUnits.available(Reg) || !UsedRegUnits.available(Reg)) { HasRegDependency = true; break; } DefedRegsInCopy.push_back(Reg); // FIXME: instead of isUse(), readsReg() would be a better fix here, // For example, we can ignore modifications in reg with undef. However, // it's not perfectly clear if skipping the internal read is safe in all // other targets. } else if (MO.isUse()) { if (!ModifiedRegUnits.available(Reg)) { HasRegDependency = true; break; } UsedOpsInCopy.push_back(i); } } return HasRegDependency; } static SmallSet getRegUnits(MCRegister Reg, const TargetRegisterInfo *TRI) { SmallSet RegUnits; for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI) RegUnits.insert(*RI); return RegUnits; } bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB, MachineFunction &MF, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII) { SmallPtrSet SinkableBBs; // FIXME: For now, we sink only to a successor which has a single predecessor // so that we can directly sink COPY instructions to the successor without // adding any new block or branch instruction. for (MachineBasicBlock *SI : CurBB.successors()) if (!SI->livein_empty() && SI->pred_size() == 1) SinkableBBs.insert(SI); if (SinkableBBs.empty()) return false; bool Changed = false; // Track which registers have been modified and used between the end of the // block and the current instruction. ModifiedRegUnits.clear(); UsedRegUnits.clear(); SeenDbgInstrs.clear(); for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) { // Track the operand index for use in Copy. SmallVector UsedOpsInCopy; // Track the register number defed in Copy. SmallVector DefedRegsInCopy; // We must sink this DBG_VALUE if its operand is sunk. To avoid searching // for DBG_VALUEs later, record them when they're encountered. if (MI.isDebugValue()) { SmallDenseMap, 4> MIUnits; bool IsValid = true; for (MachineOperand &MO : MI.debug_operands()) { if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) { // Bail if we can already tell the sink would be rejected, rather // than needlessly accumulating lots of DBG_VALUEs. if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { IsValid = false; break; } // Record debug use of each reg unit. SmallSet RegUnits = getRegUnits(MO.getReg(), TRI); for (MCRegister Reg : RegUnits) MIUnits[Reg].push_back(MO.getReg()); } } if (IsValid) { for (auto RegOps : MIUnits) SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second}); } continue; } if (MI.isDebugOrPseudoInstr()) continue; // Do not move any instruction across function call. if (MI.isCall()) return false; if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } // Don't sink the COPY if it would violate a register dependency. if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy, ModifiedRegUnits, UsedRegUnits)) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((!UsedOpsInCopy.empty() && !DefedRegsInCopy.empty()) && "Unexpect SrcReg or DefReg"); MachineBasicBlock *SuccBB = getSingleLiveInSuccBB(CurBB, SinkableBBs, DefedRegsInCopy, TRI); // Don't sink if we cannot find a single sinkable successor in which Reg // is live-in. if (!SuccBB) { LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); continue; } assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) && "Unexpected predecessor"); // Collect DBG_VALUEs that must sink with this copy. We've previously // recorded which reg units that DBG_VALUEs read, if this instruction // writes any of those units then the corresponding DBG_VALUEs must sink. MapVector DbgValsToSinkMap; for (auto &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef()) continue; SmallSet Units = getRegUnits(MO.getReg(), TRI); for (MCRegister Reg : Units) { for (auto MIRegs : SeenDbgInstrs.lookup(Reg)) { auto &Regs = DbgValsToSinkMap[MIRegs.first]; for (unsigned Reg : MIRegs.second) Regs.push_back(Reg); } } } SmallVector DbgValsToSink(DbgValsToSinkMap.begin(), DbgValsToSinkMap.end()); // Clear the kill flag if SrcReg is killed between MI and the end of the // block. clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI); MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI(); performSink(MI, *SuccBB, InsertPos, DbgValsToSink); updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy); Changed = true; ++NumPostRACopySink; } return Changed; } bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; bool Changed = false; const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); for (auto &BB : MF) Changed |= tryToSinkCopy(BB, MF, TRI, TII); return Changed; } diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index 636c1d238932..a016b7085a00 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1,1959 +1,1981 @@ //===- lib/MC/WasmObjectWriter.cpp - Wasm File Writer ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements Wasm object file writer information. // //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/BinaryFormat/WasmTraits.h" #include "llvm/Config/llvm-config.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionWasm.h" #include "llvm/MC/MCSymbolWasm.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWasmObjectWriter.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/StringSaver.h" #include using namespace llvm; #define DEBUG_TYPE "mc" namespace { // When we create the indirect function table we start at 1, so that there is // and empty slot at 0 and therefore calling a null function pointer will trap. static const uint32_t InitialTableOffset = 1; // For patching purposes, we need to remember where each section starts, both // for patching up the section size field, and for patching up references to // locations within the section. struct SectionBookkeeping { // Where the size of the section is written. uint64_t SizeOffset; // Where the section header ends (without custom section name). uint64_t PayloadOffset; // Where the contents of the section starts. uint64_t ContentsOffset; uint32_t Index; }; // A wasm data segment. A wasm binary contains only a single data section // but that can contain many segments, each with their own virtual location // in memory. Each MCSection data created by llvm is modeled as its own // wasm data segment. struct WasmDataSegment { MCSectionWasm *Section; StringRef Name; uint32_t InitFlags; uint64_t Offset; uint32_t Alignment; uint32_t LinkingFlags; SmallVector Data; }; // A wasm function to be written into the function section. struct WasmFunction { uint32_t SigIndex; const MCSymbolWasm *Sym; }; // A wasm global to be written into the global section. struct WasmGlobal { wasm::WasmGlobalType Type; uint64_t InitialValue; }; // Information about a single item which is part of a COMDAT. For each data // segment or function which is in the COMDAT, there is a corresponding // WasmComdatEntry. struct WasmComdatEntry { unsigned Kind; uint32_t Index; }; // Information about a single relocation. struct WasmRelocationEntry { uint64_t Offset; // Where is the relocation. const MCSymbolWasm *Symbol; // The symbol to relocate with. int64_t Addend; // A value to add to the symbol. unsigned Type; // The type of the relocation. const MCSectionWasm *FixupSection; // The section the relocation is targeting. WasmRelocationEntry(uint64_t Offset, const MCSymbolWasm *Symbol, int64_t Addend, unsigned Type, const MCSectionWasm *FixupSection) : Offset(Offset), Symbol(Symbol), Addend(Addend), Type(Type), FixupSection(FixupSection) {} bool hasAddend() const { return wasm::relocTypeHasAddend(Type); } void print(raw_ostream &Out) const { Out << wasm::relocTypetoString(Type) << " Off=" << Offset << ", Sym=" << *Symbol << ", Addend=" << Addend << ", FixupSection=" << FixupSection->getName(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const { print(dbgs()); } #endif }; static const uint32_t InvalidIndex = -1; struct WasmCustomSection { StringRef Name; MCSectionWasm *Section; uint32_t OutputContentsOffset; uint32_t OutputIndex; WasmCustomSection(StringRef Name, MCSectionWasm *Section) : Name(Name), Section(Section), OutputContentsOffset(0), OutputIndex(InvalidIndex) {} }; #if !defined(NDEBUG) raw_ostream &operator<<(raw_ostream &OS, const WasmRelocationEntry &Rel) { Rel.print(OS); return OS; } #endif -// Write X as an (unsigned) LEB value at offset Offset in Stream, padded +// Write Value as an (unsigned) LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableLEB(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +template +void writePatchableULEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeULEB128(X, Buffer, W); + unsigned SizeLen = encodeULEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as an signed LEB value at offset Offset in Stream, padded +// Write Value as an signed LEB value at offset Offset in Stream, padded // to allow patching. -template -void writePatchableSLEB(raw_pwrite_stream &Stream, int64_t X, uint64_t Offset) { +template +void writePatchableSLEB(raw_pwrite_stream &Stream, T Value, uint64_t Offset) { uint8_t Buffer[W]; - unsigned SizeLen = encodeSLEB128(X, Buffer, W); + unsigned SizeLen = encodeSLEB128(Value, Buffer, W); assert(SizeLen == W); Stream.pwrite((char *)Buffer, SizeLen, Offset); } -// Write X as a plain integer value at offset Offset in Stream. -static void patchI32(raw_pwrite_stream &Stream, uint32_t X, uint64_t Offset) { +static void writePatchableU32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { + writePatchableULEB(Stream, Value, Offset); +} + +static void writePatchableS32(raw_pwrite_stream &Stream, int32_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableU64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +static void writePatchableS64(raw_pwrite_stream &Stream, int64_t Value, + uint64_t Offset) { + writePatchableSLEB(Stream, Value, Offset); +} + +// Write Value as a plain integer value at offset Offset in Stream. +static void patchI32(raw_pwrite_stream &Stream, uint32_t Value, + uint64_t Offset) { uint8_t Buffer[4]; - support::endian::write32le(Buffer, X); + support::endian::write32le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } -static void patchI64(raw_pwrite_stream &Stream, uint64_t X, uint64_t Offset) { +static void patchI64(raw_pwrite_stream &Stream, uint64_t Value, + uint64_t Offset) { uint8_t Buffer[8]; - support::endian::write64le(Buffer, X); + support::endian::write64le(Buffer, Value); Stream.pwrite((char *)Buffer, sizeof(Buffer), Offset); } bool isDwoSection(const MCSection &Sec) { return Sec.getName().endswith(".dwo"); } class WasmObjectWriter : public MCObjectWriter { support::endian::Writer *W; /// The target specific Wasm writer instance. std::unique_ptr TargetObjectWriter; // Relocations for fixing up references in the code section. std::vector CodeRelocations; // Relocations for fixing up references in the data section. std::vector DataRelocations; // Index values to use for fixing up call_indirect type indices. // Maps function symbols to the index of the type of the function DenseMap TypeIndices; // Maps function symbols to the table element index space. Used // for TABLE_INDEX relocation types (i.e. address taken functions). DenseMap TableIndices; // Maps function/global/table symbols to the // function/global/table/tag/section index space. DenseMap WasmIndices; DenseMap GOTIndices; // Maps data symbols to the Wasm segment and offset/size with the segment. DenseMap DataLocations; // Stores output data (index, relocations, content offset) for custom // section. std::vector CustomSections; std::unique_ptr ProducersSection; std::unique_ptr TargetFeaturesSection; // Relocations for fixing up references in the custom sections. DenseMap> CustomSectionsRelocations; // Map from section to defining function symbol. DenseMap SectionFunctions; DenseMap SignatureIndices; SmallVector Signatures; SmallVector DataSegments; unsigned NumFunctionImports = 0; unsigned NumGlobalImports = 0; unsigned NumTableImports = 0; unsigned NumTagImports = 0; uint32_t SectionCount = 0; enum class DwoMode { AllSections, NonDwoOnly, DwoOnly, }; bool IsSplitDwarf = false; raw_pwrite_stream *OS = nullptr; raw_pwrite_stream *DwoOS = nullptr; // TargetObjectWriter wranppers. bool is64Bit() const { return TargetObjectWriter->is64Bit(); } bool isEmscripten() const { return TargetObjectWriter->isEmscripten(); } void startSection(SectionBookkeeping &Section, unsigned SectionId); void startCustomSection(SectionBookkeeping &Section, StringRef Name); void endSection(SectionBookkeeping &Section); public: WasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS_) : TargetObjectWriter(std::move(MOTW)), OS(&OS_) {} WasmObjectWriter(std::unique_ptr MOTW, raw_pwrite_stream &OS_, raw_pwrite_stream &DwoOS_) : TargetObjectWriter(std::move(MOTW)), IsSplitDwarf(true), OS(&OS_), DwoOS(&DwoOS_) {} private: void reset() override { CodeRelocations.clear(); DataRelocations.clear(); TypeIndices.clear(); WasmIndices.clear(); GOTIndices.clear(); TableIndices.clear(); DataLocations.clear(); CustomSections.clear(); ProducersSection.reset(); TargetFeaturesSection.reset(); CustomSectionsRelocations.clear(); SignatureIndices.clear(); Signatures.clear(); DataSegments.clear(); SectionFunctions.clear(); NumFunctionImports = 0; NumGlobalImports = 0; NumTableImports = 0; MCObjectWriter::reset(); } void writeHeader(const MCAssembler &Asm); void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) override; void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) override; void prepareImports(SmallVectorImpl &Imports, MCAssembler &Asm, const MCAsmLayout &Layout); uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override; uint64_t writeOneObject(MCAssembler &Asm, const MCAsmLayout &Layout, DwoMode Mode); void writeString(const StringRef Str) { encodeULEB128(Str.size(), W->OS); W->OS << Str; } void writeStringWithAlignment(const StringRef Str, unsigned Alignment); void writeI32(int32_t val) { char Buffer[4]; support::endian::write32le(Buffer, val); W->OS.write(Buffer, sizeof(Buffer)); } void writeI64(int64_t val) { char Buffer[8]; support::endian::write64le(Buffer, val); W->OS.write(Buffer, sizeof(Buffer)); } void writeValueType(wasm::ValType Ty) { W->OS << static_cast(Ty); } void writeTypeSection(ArrayRef Signatures); void writeImportSection(ArrayRef Imports, uint64_t DataSize, uint32_t NumElements); void writeFunctionSection(ArrayRef Functions); void writeExportSection(ArrayRef Exports); void writeElemSection(const MCSymbolWasm *IndirectFunctionTable, ArrayRef TableElems); void writeDataCountSection(); uint32_t writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout, ArrayRef Functions); uint32_t writeDataSection(const MCAsmLayout &Layout); void writeTagSection(ArrayRef TagTypes); void writeGlobalSection(ArrayRef Globals); void writeTableSection(ArrayRef Tables); void writeRelocSection(uint32_t SectionIndex, StringRef Name, std::vector &Relocations); void writeLinkingMetaDataSection( ArrayRef SymbolInfos, ArrayRef> InitFuncs, const std::map> &Comdats); void writeCustomSection(WasmCustomSection &CustomSection, const MCAssembler &Asm, const MCAsmLayout &Layout); void writeCustomRelocSections(); uint64_t getProvisionalValue(const WasmRelocationEntry &RelEntry, const MCAsmLayout &Layout); void applyRelocations(ArrayRef Relocations, uint64_t ContentsOffset, const MCAsmLayout &Layout); uint32_t getRelocationIndexValue(const WasmRelocationEntry &RelEntry); uint32_t getFunctionType(const MCSymbolWasm &Symbol); uint32_t getTagType(const MCSymbolWasm &Symbol); void registerFunctionType(const MCSymbolWasm &Symbol); void registerTagType(const MCSymbolWasm &Symbol); }; } // end anonymous namespace // Write out a section header and a patchable section size field. void WasmObjectWriter::startSection(SectionBookkeeping &Section, unsigned SectionId) { LLVM_DEBUG(dbgs() << "startSection " << SectionId << "\n"); W->OS << char(SectionId); Section.SizeOffset = W->OS.tell(); // The section size. We don't know the size yet, so reserve enough space // for any 32-bit value; we'll patch it later. encodeULEB128(0, W->OS, 5); // The position where the section starts, for measuring its size. Section.ContentsOffset = W->OS.tell(); Section.PayloadOffset = W->OS.tell(); Section.Index = SectionCount++; } // Write a string with extra paddings for trailing alignment // TODO: support alignment at asm and llvm level? void WasmObjectWriter::writeStringWithAlignment(const StringRef Str, unsigned Alignment) { // Calculate the encoded size of str length and add pads based on it and // alignment. raw_null_ostream NullOS; uint64_t StrSizeLength = encodeULEB128(Str.size(), NullOS); uint64_t Offset = W->OS.tell() + StrSizeLength + Str.size(); uint64_t Paddings = offsetToAlignment(Offset, Align(Alignment)); Offset += Paddings; // LEB128 greater than 5 bytes is invalid assert((StrSizeLength + Paddings) <= 5 && "too long string to align"); encodeSLEB128(Str.size(), W->OS, StrSizeLength + Paddings); W->OS << Str; assert(W->OS.tell() == Offset && "invalid padding"); } void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section, StringRef Name) { LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n"); startSection(Section, wasm::WASM_SEC_CUSTOM); // The position where the section header ends, for measuring its size. Section.PayloadOffset = W->OS.tell(); // Custom sections in wasm also have a string identifier. if (Name != "__clangast") { writeString(Name); } else { // The on-disk hashtable in clangast needs to be aligned by 4 bytes. writeStringWithAlignment(Name, 4); } // The position where the custom section starts. Section.ContentsOffset = W->OS.tell(); } // Now that the section is complete and we know how big it is, patch up the // section size field at the start of the section. void WasmObjectWriter::endSection(SectionBookkeeping &Section) { uint64_t Size = W->OS.tell(); // /dev/null doesn't support seek/tell and can report offset of 0. // Simply skip this patching in that case. if (!Size) return; Size -= Section.PayloadOffset; if (uint32_t(Size) != Size) report_fatal_error("section size does not fit in a uint32_t"); LLVM_DEBUG(dbgs() << "endSection size=" << Size << "\n"); // Write the final section size to the payload_len field, which follows // the section id byte. - writePatchableLEB<5>(static_cast(W->OS), Size, - Section.SizeOffset); + writePatchableU32(static_cast(W->OS), Size, + Section.SizeOffset); } // Emit the Wasm header. void WasmObjectWriter::writeHeader(const MCAssembler &Asm) { W->OS.write(wasm::WasmMagic, sizeof(wasm::WasmMagic)); W->write(wasm::WasmVersion); } void WasmObjectWriter::executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) { // Some compilation units require the indirect function table to be present // but don't explicitly reference it. This is the case for call_indirect // without the reference-types feature, and also function bitcasts in all // cases. In those cases the __indirect_function_table has the // WASM_SYMBOL_NO_STRIP attribute. Here we make sure this symbol makes it to // the assembler, if needed. if (auto *Sym = Asm.getContext().lookupSymbol("__indirect_function_table")) { const auto *WasmSym = static_cast(Sym); if (WasmSym->isNoStrip()) Asm.registerSymbol(*Sym); } // Build a map of sections to the function that defines them, for use // in recordRelocation. for (const MCSymbol &S : Asm.symbols()) { const auto &WS = static_cast(S); if (WS.isDefined() && WS.isFunction() && !WS.isVariable()) { const auto &Sec = static_cast(S.getSection()); auto Pair = SectionFunctions.insert(std::make_pair(&Sec, &S)); if (!Pair.second) report_fatal_error("section already has a defining function: " + Sec.getName()); } } } void WasmObjectWriter::recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { // The WebAssembly backend should never generate FKF_IsPCRel fixups assert(!(Asm.getBackend().getFixupKindInfo(Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel)); const auto &FixupSection = cast(*Fragment->getParent()); uint64_t C = Target.getConstant(); uint64_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); MCContext &Ctx = Asm.getContext(); bool IsLocRel = false; if (const MCSymbolRefExpr *RefB = Target.getSymB()) { const auto &SymB = cast(RefB->getSymbol()); if (FixupSection.getKind().isText()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' unsupported subtraction expression used in " "relocation in code section."); return; } if (SymB.isUndefined()) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' can not be undefined in a subtraction expression"); return; } const MCSection &SecB = SymB.getSection(); if (&SecB != &FixupSection) { Ctx.reportError(Fixup.getLoc(), Twine("symbol '") + SymB.getName() + "' can not be placed in a different section"); return; } IsLocRel = true; C += FixupOffset - Layout.getSymbolOffset(SymB); } // We either rejected the fixup or folded B into C at this point. const MCSymbolRefExpr *RefA = Target.getSymA(); const auto *SymA = cast(&RefA->getSymbol()); // The .init_array isn't translated as data, so don't do relocations in it. if (FixupSection.getName().startswith(".init_array")) { SymA->setUsedInInitArray(); return; } if (SymA->isVariable()) { const MCExpr *Expr = SymA->getVariableValue(); if (const auto *Inner = dyn_cast(Expr)) if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF) llvm_unreachable("weakref used in reloc not yet implemented"); } // Put any constant offset in an addend. Offsets can be negative, and // LLVM expects wrapping, in contrast to wasm's immediates which can't // be negative and don't wrap. FixedValue = 0; unsigned Type = TargetObjectWriter->getRelocType(Target, Fixup, FixupSection, IsLocRel); // Absolute offset within a section or a function. // Currently only supported for for metadata sections. // See: test/MC/WebAssembly/blockaddress.ll if ((Type == wasm::R_WASM_FUNCTION_OFFSET_I32 || Type == wasm::R_WASM_FUNCTION_OFFSET_I64 || Type == wasm::R_WASM_SECTION_OFFSET_I32) && SymA->isDefined()) { // SymA can be a temp data symbol that represents a function (in which case // it needs to be replaced by the section symbol), [XXX and it apparently // later gets changed again to a func symbol?] or it can be a real // function symbol, in which case it can be left as-is. if (!FixupSection.getKind().isMetadata()) report_fatal_error("relocations for function or section offsets are " "only supported in metadata sections"); const MCSymbol *SectionSymbol = nullptr; const MCSection &SecA = SymA->getSection(); if (SecA.getKind().isText()) { auto SecSymIt = SectionFunctions.find(&SecA); if (SecSymIt == SectionFunctions.end()) report_fatal_error("section doesn\'t have defining symbol"); SectionSymbol = SecSymIt->second; } else { SectionSymbol = SecA.getBeginSymbol(); } if (!SectionSymbol) report_fatal_error("section symbol is required for relocation"); C += Layout.getSymbolOffset(*SymA); SymA = cast(SectionSymbol); } if (Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB || Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB64 || Type == wasm::R_WASM_TABLE_INDEX_SLEB || Type == wasm::R_WASM_TABLE_INDEX_SLEB64 || Type == wasm::R_WASM_TABLE_INDEX_I32 || Type == wasm::R_WASM_TABLE_INDEX_I64) { // TABLE_INDEX relocs implicitly use the default indirect function table. // We require the function table to have already been defined. auto TableName = "__indirect_function_table"; MCSymbolWasm *Sym = cast_or_null(Ctx.lookupSymbol(TableName)); if (!Sym) { report_fatal_error("missing indirect function table symbol"); } else { if (!Sym->isFunctionTable()) report_fatal_error("__indirect_function_table symbol has wrong type"); // Ensure that __indirect_function_table reaches the output. Sym->setNoStrip(); Asm.registerSymbol(*Sym); } } // Relocation other than R_WASM_TYPE_INDEX_LEB are required to be // against a named symbol. if (Type != wasm::R_WASM_TYPE_INDEX_LEB) { if (SymA->getName().empty()) report_fatal_error("relocations against un-named temporaries are not yet " "supported by wasm"); SymA->setUsedInReloc(); } switch (RefA->getKind()) { case MCSymbolRefExpr::VK_GOT: case MCSymbolRefExpr::VK_WASM_GOT_TLS: SymA->setUsedInGOT(); break; default: break; } WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection); LLVM_DEBUG(dbgs() << "WasmReloc: " << Rec << "\n"); if (FixupSection.isWasmData()) { DataRelocations.push_back(Rec); } else if (FixupSection.getKind().isText()) { CodeRelocations.push_back(Rec); } else if (FixupSection.getKind().isMetadata()) { CustomSectionsRelocations[&FixupSection].push_back(Rec); } else { llvm_unreachable("unexpected section type"); } } // Compute a value to write into the code at the location covered // by RelEntry. This value isn't used by the static linker; it just serves // to make the object format more readable and more likely to be directly // useable. uint64_t WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry, const MCAsmLayout &Layout) { if ((RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_LEB || RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_I32) && !RelEntry.Symbol->isGlobal()) { assert(GOTIndices.count(RelEntry.Symbol) > 0 && "symbol not found in GOT index space"); return GOTIndices[RelEntry.Symbol]; } switch (RelEntry.Type) { case wasm::R_WASM_TABLE_INDEX_REL_SLEB: case wasm::R_WASM_TABLE_INDEX_REL_SLEB64: case wasm::R_WASM_TABLE_INDEX_SLEB: case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_I32: case wasm::R_WASM_TABLE_INDEX_I64: { // Provisional value is table address of the resolved symbol itself const MCSymbolWasm *Base = cast(Layout.getBaseSymbol(*RelEntry.Symbol)); assert(Base->isFunction()); if (RelEntry.Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB || RelEntry.Type == wasm::R_WASM_TABLE_INDEX_REL_SLEB64) return TableIndices[Base] - InitialTableOffset; else return TableIndices[Base]; } case wasm::R_WASM_TYPE_INDEX_LEB: // Provisional value is same as the index return getRelocationIndexValue(RelEntry); case wasm::R_WASM_FUNCTION_INDEX_LEB: case wasm::R_WASM_GLOBAL_INDEX_LEB: case wasm::R_WASM_GLOBAL_INDEX_I32: case wasm::R_WASM_TAG_INDEX_LEB: case wasm::R_WASM_TABLE_NUMBER_LEB: // Provisional value is function/global/tag Wasm index assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space"); return WasmIndices[RelEntry.Symbol]; case wasm::R_WASM_FUNCTION_OFFSET_I32: case wasm::R_WASM_FUNCTION_OFFSET_I64: case wasm::R_WASM_SECTION_OFFSET_I32: { if (!RelEntry.Symbol->isDefined()) return 0; const auto &Section = static_cast(RelEntry.Symbol->getSection()); return Section.getSectionOffset() + RelEntry.Addend; } case wasm::R_WASM_MEMORY_ADDR_LEB: case wasm::R_WASM_MEMORY_ADDR_LEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_I32: case wasm::R_WASM_MEMORY_ADDR_I64: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32: { // Provisional value is address of the global plus the offset // For undefined symbols, use zero if (!RelEntry.Symbol->isDefined()) return 0; const wasm::WasmDataReference &SymRef = DataLocations[RelEntry.Symbol]; const WasmDataSegment &Segment = DataSegments[SymRef.Segment]; // Ignore overflow. LLVM allows address arithmetic to silently wrap. return Segment.Offset + SymRef.Offset + RelEntry.Addend; } default: llvm_unreachable("invalid relocation type"); } } static void addData(SmallVectorImpl &DataBytes, MCSectionWasm &DataSection) { LLVM_DEBUG(errs() << "addData: " << DataSection.getName() << "\n"); DataBytes.resize(alignTo(DataBytes.size(), DataSection.getAlignment())); for (const MCFragment &Frag : DataSection) { if (Frag.hasInstructions()) report_fatal_error("only data supported in data sections"); if (auto *Align = dyn_cast(&Frag)) { if (Align->getValueSize() != 1) report_fatal_error("only byte values supported for alignment"); // If nops are requested, use zeros, as this is the data section. uint8_t Value = Align->hasEmitNops() ? 0 : Align->getValue(); uint64_t Size = std::min(alignTo(DataBytes.size(), Align->getAlignment()), DataBytes.size() + Align->getMaxBytesToEmit()); DataBytes.resize(Size, Value); } else if (auto *Fill = dyn_cast(&Frag)) { int64_t NumValues; if (!Fill->getNumValues().evaluateAsAbsolute(NumValues)) llvm_unreachable("The fill should be an assembler constant"); DataBytes.insert(DataBytes.end(), Fill->getValueSize() * NumValues, Fill->getValue()); } else if (auto *LEB = dyn_cast(&Frag)) { const SmallVectorImpl &Contents = LEB->getContents(); llvm::append_range(DataBytes, Contents); } else { const auto &DataFrag = cast(Frag); const SmallVectorImpl &Contents = DataFrag.getContents(); llvm::append_range(DataBytes, Contents); } } LLVM_DEBUG(dbgs() << "addData -> " << DataBytes.size() << "\n"); } uint32_t WasmObjectWriter::getRelocationIndexValue(const WasmRelocationEntry &RelEntry) { if (RelEntry.Type == wasm::R_WASM_TYPE_INDEX_LEB) { if (!TypeIndices.count(RelEntry.Symbol)) report_fatal_error("symbol not found in type index space: " + RelEntry.Symbol->getName()); return TypeIndices[RelEntry.Symbol]; } return RelEntry.Symbol->getIndex(); } // Apply the portions of the relocation records that we can handle ourselves // directly. void WasmObjectWriter::applyRelocations( ArrayRef Relocations, uint64_t ContentsOffset, const MCAsmLayout &Layout) { auto &Stream = static_cast(W->OS); for (const WasmRelocationEntry &RelEntry : Relocations) { uint64_t Offset = ContentsOffset + RelEntry.FixupSection->getSectionOffset() + RelEntry.Offset; LLVM_DEBUG(dbgs() << "applyRelocation: " << RelEntry << "\n"); - auto Value = getProvisionalValue(RelEntry, Layout); + uint64_t Value = getProvisionalValue(RelEntry, Layout); switch (RelEntry.Type) { case wasm::R_WASM_FUNCTION_INDEX_LEB: case wasm::R_WASM_TYPE_INDEX_LEB: case wasm::R_WASM_GLOBAL_INDEX_LEB: case wasm::R_WASM_MEMORY_ADDR_LEB: case wasm::R_WASM_TAG_INDEX_LEB: case wasm::R_WASM_TABLE_NUMBER_LEB: - writePatchableLEB<5>(Stream, Value, Offset); + writePatchableU32(Stream, Value, Offset); break; case wasm::R_WASM_MEMORY_ADDR_LEB64: - writePatchableLEB<10>(Stream, Value, Offset); + writePatchableU64(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_I32: case wasm::R_WASM_MEMORY_ADDR_I32: case wasm::R_WASM_FUNCTION_OFFSET_I32: case wasm::R_WASM_SECTION_OFFSET_I32: case wasm::R_WASM_GLOBAL_INDEX_I32: case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32: patchI32(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_I64: case wasm::R_WASM_MEMORY_ADDR_I64: case wasm::R_WASM_FUNCTION_OFFSET_I64: patchI64(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB: case wasm::R_WASM_TABLE_INDEX_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_SLEB: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB: - writePatchableSLEB<5>(Stream, Value, Offset); + writePatchableS32(Stream, Value, Offset); break; case wasm::R_WASM_TABLE_INDEX_SLEB64: case wasm::R_WASM_TABLE_INDEX_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_SLEB64: case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64: case wasm::R_WASM_MEMORY_ADDR_TLS_SLEB64: - writePatchableSLEB<10>(Stream, Value, Offset); + w