diff --git a/contrib/llvm-project/clang/include/clang/AST/ExternalASTSource.h b/contrib/llvm-project/clang/include/clang/AST/ExternalASTSource.h index 385c32edbae0..582ed7c65f58 100644 --- a/contrib/llvm-project/clang/include/clang/AST/ExternalASTSource.h +++ b/contrib/llvm-project/clang/include/clang/AST/ExternalASTSource.h @@ -1,596 +1,618 @@ //===- ExternalASTSource.h - Abstract External AST Interface ----*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the ExternalASTSource interface, which enables // construction of AST nodes from some external source. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_EXTERNALASTSOURCE_H #define LLVM_CLANG_AST_EXTERNALASTSOURCE_H #include "clang/AST/CharUnits.h" #include "clang/AST/DeclBase.h" #include "clang/Basic/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/Support/PointerLikeTypeTraits.h" +#include #include #include #include #include +#include #include #include namespace clang { class ASTConsumer; class ASTContext; class ASTSourceDescriptor; class CXXBaseSpecifier; class CXXCtorInitializer; class CXXRecordDecl; class DeclarationName; class FieldDecl; class IdentifierInfo; class NamedDecl; class ObjCInterfaceDecl; class RecordDecl; class Selector; class Stmt; class TagDecl; /// Abstract interface for external sources of AST nodes. /// /// External AST sources provide AST nodes constructed from some /// external source, such as a precompiled header. External AST /// sources can resolve types and declarations from abstract IDs into /// actual type and declaration nodes, and read parts of declaration /// contexts. class ExternalASTSource : public RefCountedBase { friend class ExternalSemaSource; /// Generation number for this external AST source. Must be increased /// whenever we might have added new redeclarations for existing decls. uint32_t CurrentGeneration = 0; /// LLVM-style RTTI. static char ID; public: ExternalASTSource() = default; virtual ~ExternalASTSource(); /// RAII class for safely pairing a StartedDeserializing call /// with FinishedDeserializing. class Deserializing { ExternalASTSource *Source; public: explicit Deserializing(ExternalASTSource *source) : Source(source) { assert(Source); Source->StartedDeserializing(); } ~Deserializing() { Source->FinishedDeserializing(); } }; /// Get the current generation of this AST source. This number /// is incremented each time the AST source lazily extends an existing /// entity. uint32_t getGeneration() const { return CurrentGeneration; } /// Resolve a declaration ID into a declaration, potentially /// building a new declaration. /// /// This method only needs to be implemented if the AST source ever /// passes back decl sets as VisibleDeclaration objects. /// /// The default implementation of this method is a no-op. virtual Decl *GetExternalDecl(GlobalDeclID ID); /// Resolve a selector ID into a selector. /// /// This operation only needs to be implemented if the AST source /// returns non-zero for GetNumKnownSelectors(). /// /// The default implementation of this method is a no-op. virtual Selector GetExternalSelector(uint32_t ID); /// Returns the number of selectors known to the external AST /// source. /// /// The default implementation of this method is a no-op. virtual uint32_t GetNumExternalSelectors(); /// Resolve the offset of a statement in the decl stream into /// a statement. /// /// This operation is meant to be used via a LazyOffsetPtr. It only /// needs to be implemented if the AST source uses methods like /// FunctionDecl::setLazyBody when building decls. /// /// The default implementation of this method is a no-op. virtual Stmt *GetExternalDeclStmt(uint64_t Offset); /// Resolve the offset of a set of C++ constructor initializers in /// the decl stream into an array of initializers. /// /// The default implementation of this method is a no-op. virtual CXXCtorInitializer **GetExternalCXXCtorInitializers(uint64_t Offset); /// Resolve the offset of a set of C++ base specifiers in the decl /// stream into an array of specifiers. /// /// The default implementation of this method is a no-op. virtual CXXBaseSpecifier *GetExternalCXXBaseSpecifiers(uint64_t Offset); /// Update an out-of-date identifier. virtual void updateOutOfDateIdentifier(const IdentifierInfo &II) {} /// Find all declarations with the given name in the given context, /// and add them to the context by calling SetExternalVisibleDeclsForName /// or SetNoExternalVisibleDeclsForName. /// \return \c true if any declarations might have been found, \c false if /// we definitely have no declarations with tbis name. /// /// The default implementation of this method is a no-op returning \c false. virtual bool FindExternalVisibleDeclsByName(const DeclContext *DC, DeclarationName Name); /// Ensures that the table of all visible declarations inside this /// context is up to date. /// /// The default implementation of this function is a no-op. virtual void completeVisibleDeclsMap(const DeclContext *DC); /// Retrieve the module that corresponds to the given module ID. virtual Module *getModule(unsigned ID) { return nullptr; } /// Return a descriptor for the corresponding module, if one exists. virtual std::optional getSourceDescriptor(unsigned ID); enum ExtKind { EK_Always, EK_Never, EK_ReplyHazy }; virtual ExtKind hasExternalDefinitions(const Decl *D); /// Finds all declarations lexically contained within the given /// DeclContext, after applying an optional filter predicate. /// /// \param IsKindWeWant a predicate function that returns true if the passed /// declaration kind is one we are looking for. /// /// The default implementation of this method is a no-op. virtual void FindExternalLexicalDecls(const DeclContext *DC, llvm::function_ref IsKindWeWant, SmallVectorImpl &Result); /// Finds all declarations lexically contained within the given /// DeclContext. void FindExternalLexicalDecls(const DeclContext *DC, SmallVectorImpl &Result) { FindExternalLexicalDecls(DC, [](Decl::Kind) { return true; }, Result); } /// Get the decls that are contained in a file in the Offset/Length /// range. \p Length can be 0 to indicate a point at \p Offset instead of /// a range. virtual void FindFileRegionDecls(FileID File, unsigned Offset, unsigned Length, SmallVectorImpl &Decls); /// Gives the external AST source an opportunity to complete /// the redeclaration chain for a declaration. Called each time we /// need the most recent declaration of a declaration after the /// generation count is incremented. virtual void CompleteRedeclChain(const Decl *D); /// Gives the external AST source an opportunity to complete /// an incomplete type. virtual void CompleteType(TagDecl *Tag); /// Gives the external AST source an opportunity to complete an /// incomplete Objective-C class. /// /// This routine will only be invoked if the "externally completed" bit is /// set on the ObjCInterfaceDecl via the function /// \c ObjCInterfaceDecl::setExternallyCompleted(). virtual void CompleteType(ObjCInterfaceDecl *Class); /// Loads comment ranges. virtual void ReadComments(); /// Notify ExternalASTSource that we started deserialization of /// a decl or type so until FinishedDeserializing is called there may be /// decls that are initializing. Must be paired with FinishedDeserializing. /// /// The default implementation of this method is a no-op. virtual void StartedDeserializing(); /// Notify ExternalASTSource that we finished the deserialization of /// a decl or type. Must be paired with StartedDeserializing. /// /// The default implementation of this method is a no-op. virtual void FinishedDeserializing(); /// Function that will be invoked when we begin parsing a new /// translation unit involving this external AST source. /// /// The default implementation of this method is a no-op. virtual void StartTranslationUnit(ASTConsumer *Consumer); /// Print any statistics that have been gathered regarding /// the external AST source. /// /// The default implementation of this method is a no-op. virtual void PrintStats(); /// Perform layout on the given record. /// /// This routine allows the external AST source to provide an specific /// layout for a record, overriding the layout that would normally be /// constructed. It is intended for clients who receive specific layout /// details rather than source code (such as LLDB). The client is expected /// to fill in the field offsets, base offsets, virtual base offsets, and /// complete object size. /// /// \param Record The record whose layout is being requested. /// /// \param Size The final size of the record, in bits. /// /// \param Alignment The final alignment of the record, in bits. /// /// \param FieldOffsets The offset of each of the fields within the record, /// expressed in bits. All of the fields must be provided with offsets. /// /// \param BaseOffsets The offset of each of the direct, non-virtual base /// classes. If any bases are not given offsets, the bases will be laid /// out according to the ABI. /// /// \param VirtualBaseOffsets The offset of each of the virtual base classes /// (either direct or not). If any bases are not given offsets, the bases will be laid /// out according to the ABI. /// /// \returns true if the record layout was provided, false otherwise. virtual bool layoutRecordType( const RecordDecl *Record, uint64_t &Size, uint64_t &Alignment, llvm::DenseMap &FieldOffsets, llvm::DenseMap &BaseOffsets, llvm::DenseMap &VirtualBaseOffsets); //===--------------------------------------------------------------------===// // Queries for performance analysis. //===--------------------------------------------------------------------===// struct MemoryBufferSizes { size_t malloc_bytes; size_t mmap_bytes; MemoryBufferSizes(size_t malloc_bytes, size_t mmap_bytes) : malloc_bytes(malloc_bytes), mmap_bytes(mmap_bytes) {} }; /// Return the amount of memory used by memory buffers, breaking down /// by heap-backed versus mmap'ed memory. MemoryBufferSizes getMemoryBufferSizes() const { MemoryBufferSizes sizes(0, 0); getMemoryBufferSizes(sizes); return sizes; } virtual void getMemoryBufferSizes(MemoryBufferSizes &sizes) const; /// LLVM-style RTTI. /// \{ virtual bool isA(const void *ClassID) const { return ClassID == &ID; } static bool classof(const ExternalASTSource *S) { return S->isA(&ID); } /// \} protected: static DeclContextLookupResult SetExternalVisibleDeclsForName(const DeclContext *DC, DeclarationName Name, ArrayRef Decls); static DeclContextLookupResult SetNoExternalVisibleDeclsForName(const DeclContext *DC, DeclarationName Name); /// Increment the current generation. uint32_t incrementGeneration(ASTContext &C); }; /// A lazy pointer to an AST node (of base type T) that resides /// within an external AST source. /// /// The AST node is identified within the external AST source by a /// 63-bit offset, and can be retrieved via an operation on the /// external AST source itself. template struct LazyOffsetPtr { /// Either a pointer to an AST node or the offset within the /// external AST source where the AST node can be found. /// /// If the low bit is clear, a pointer to the AST node. If the low /// bit is set, the upper 63 bits are the offset. - mutable uint64_t Ptr = 0; + static constexpr size_t DataSize = std::max(sizeof(uint64_t), sizeof(T *)); + alignas(uint64_t) alignas(T *) mutable unsigned char Data[DataSize] = {}; + + unsigned char GetLSB() const { + return Data[llvm::sys::IsBigEndianHost ? DataSize - 1 : 0]; + } + + template U &As(bool New) const { + unsigned char *Obj = + Data + (llvm::sys::IsBigEndianHost ? DataSize - sizeof(U) : 0); + if (New) + return *new (Obj) U; + return *std::launder(reinterpret_cast(Obj)); + } + + T *&GetPtr() const { return As(false); } + uint64_t &GetU64() const { return As(false); } + void SetPtr(T *Ptr) const { As(true) = Ptr; } + void SetU64(uint64_t U64) const { As(true) = U64; } public: LazyOffsetPtr() = default; - explicit LazyOffsetPtr(T *Ptr) : Ptr(reinterpret_cast(Ptr)) {} + explicit LazyOffsetPtr(T *Ptr) : Data() { SetPtr(Ptr); } - explicit LazyOffsetPtr(uint64_t Offset) : Ptr((Offset << 1) | 0x01) { + explicit LazyOffsetPtr(uint64_t Offset) : Data() { assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits"); if (Offset == 0) - Ptr = 0; + SetPtr(nullptr); + else + SetU64((Offset << 1) | 0x01); } LazyOffsetPtr &operator=(T *Ptr) { - this->Ptr = reinterpret_cast(Ptr); + SetPtr(Ptr); return *this; } LazyOffsetPtr &operator=(uint64_t Offset) { assert((Offset << 1 >> 1) == Offset && "Offsets must require < 63 bits"); if (Offset == 0) - Ptr = 0; + SetPtr(nullptr); else - Ptr = (Offset << 1) | 0x01; + SetU64((Offset << 1) | 0x01); return *this; } /// Whether this pointer is non-NULL. /// /// This operation does not require the AST node to be deserialized. - explicit operator bool() const { return Ptr != 0; } + explicit operator bool() const { return isOffset() || GetPtr() != nullptr; } /// Whether this pointer is non-NULL. /// /// This operation does not require the AST node to be deserialized. - bool isValid() const { return Ptr != 0; } + bool isValid() const { return isOffset() || GetPtr() != nullptr; } /// Whether this pointer is currently stored as an offset. - bool isOffset() const { return Ptr & 0x01; } + bool isOffset() const { return GetLSB() & 0x01; } /// Retrieve the pointer to the AST node that this lazy pointer points to. /// /// \param Source the external AST source. /// /// \returns a pointer to the AST node. T *get(ExternalASTSource *Source) const { if (isOffset()) { assert(Source && "Cannot deserialize a lazy pointer without an AST source"); - Ptr = reinterpret_cast((Source->*Get)(OffsT(Ptr >> 1))); + SetPtr((Source->*Get)(OffsT(GetU64() >> 1))); } - return reinterpret_cast(Ptr); + return GetPtr(); } /// Retrieve the address of the AST node pointer. Deserializes the pointee if /// necessary. T **getAddressOfPointer(ExternalASTSource *Source) const { // Ensure the integer is in pointer form. (void)get(Source); - return reinterpret_cast(&Ptr); + return &GetPtr(); } }; /// A lazy value (of type T) that is within an AST node of type Owner, /// where the value might change in later generations of the external AST /// source. template struct LazyGenerationalUpdatePtr { /// A cache of the value of this pointer, in the most recent generation in /// which we queried it. struct LazyData { ExternalASTSource *ExternalSource; uint32_t LastGeneration = 0; T LastValue; LazyData(ExternalASTSource *Source, T Value) : ExternalSource(Source), LastValue(Value) {} }; // Our value is represented as simply T if there is no external AST source. using ValueType = llvm::PointerUnion; ValueType Value; LazyGenerationalUpdatePtr(ValueType V) : Value(V) {} // Defined in ASTContext.h static ValueType makeValue(const ASTContext &Ctx, T Value); public: explicit LazyGenerationalUpdatePtr(const ASTContext &Ctx, T Value = T()) : Value(makeValue(Ctx, Value)) {} /// Create a pointer that is not potentially updated by later generations of /// the external AST source. enum NotUpdatedTag { NotUpdated }; LazyGenerationalUpdatePtr(NotUpdatedTag, T Value = T()) : Value(Value) {} /// Forcibly set this pointer (which must be lazy) as needing updates. void markIncomplete() { Value.template get()->LastGeneration = 0; } /// Set the value of this pointer, in the current generation. void set(T NewValue) { if (auto *LazyVal = Value.template dyn_cast()) { LazyVal->LastValue = NewValue; return; } Value = NewValue; } /// Set the value of this pointer, for this and all future generations. void setNotUpdated(T NewValue) { Value = NewValue; } /// Get the value of this pointer, updating its owner if necessary. T get(Owner O) { if (auto *LazyVal = Value.template dyn_cast()) { if (LazyVal->LastGeneration != LazyVal->ExternalSource->getGeneration()) { LazyVal->LastGeneration = LazyVal->ExternalSource->getGeneration(); (LazyVal->ExternalSource->*Update)(O); } return LazyVal->LastValue; } return Value.template get(); } /// Get the most recently computed value of this pointer without updating it. T getNotUpdated() const { if (auto *LazyVal = Value.template dyn_cast()) return LazyVal->LastValue; return Value.template get(); } void *getOpaqueValue() { return Value.getOpaqueValue(); } static LazyGenerationalUpdatePtr getFromOpaqueValue(void *Ptr) { return LazyGenerationalUpdatePtr(ValueType::getFromOpaqueValue(Ptr)); } }; } // namespace clang namespace llvm { /// Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be /// placed into a PointerUnion. template struct PointerLikeTypeTraits< clang::LazyGenerationalUpdatePtr> { using Ptr = clang::LazyGenerationalUpdatePtr; static void *getAsVoidPointer(Ptr P) { return P.getOpaqueValue(); } static Ptr getFromVoidPointer(void *P) { return Ptr::getFromOpaqueValue(P); } static constexpr int NumLowBitsAvailable = PointerLikeTypeTraits::NumLowBitsAvailable - 1; }; } // namespace llvm namespace clang { /// Represents a lazily-loaded vector of data. /// /// The lazily-loaded vector of data contains data that is partially loaded /// from an external source and partially added by local translation. The /// items loaded from the external source are loaded lazily, when needed for /// iteration over the complete vector. template&), unsigned LoadedStorage = 2, unsigned LocalStorage = 4> class LazyVector { SmallVector Loaded; SmallVector Local; public: /// Iteration over the elements in the vector. /// /// In a complete iteration, the iterator walks the range [-M, N), /// where negative values are used to indicate elements /// loaded from the external source while non-negative values are used to /// indicate elements added via \c push_back(). /// However, to provide iteration in source order (for, e.g., chained /// precompiled headers), dereferencing the iterator flips the negative /// values (corresponding to loaded entities), so that position -M /// corresponds to element 0 in the loaded entities vector, position -M+1 /// corresponds to element 1 in the loaded entities vector, etc. This /// gives us a reasonably efficient, source-order walk. /// /// We define this as a wrapping iterator around an int. The /// iterator_adaptor_base class forwards the iterator methods to basic integer /// arithmetic. class iterator : public llvm::iterator_adaptor_base< iterator, int, std::random_access_iterator_tag, T, int, T *, T &> { friend class LazyVector; LazyVector *Self; iterator(LazyVector *Self, int Position) : iterator::iterator_adaptor_base(Position), Self(Self) {} bool isLoaded() const { return this->I < 0; } public: iterator() : iterator(nullptr, 0) {} typename iterator::reference operator*() const { if (isLoaded()) return Self->Loaded.end()[this->I]; return Self->Local.begin()[this->I]; } }; iterator begin(Source *source, bool LocalOnly = false) { if (LocalOnly) return iterator(this, 0); if (source) (source->*Loader)(Loaded); return iterator(this, -(int)Loaded.size()); } iterator end() { return iterator(this, Local.size()); } void push_back(const T& LocalValue) { Local.push_back(LocalValue); } void erase(iterator From, iterator To) { if (From.isLoaded() && To.isLoaded()) { Loaded.erase(&*From, &*To); return; } if (From.isLoaded()) { Loaded.erase(&*From, Loaded.end()); From = begin(nullptr, true); } Local.erase(&*From, &*To); } }; /// A lazy pointer to a statement. using LazyDeclStmtPtr = LazyOffsetPtr; /// A lazy pointer to a declaration. using LazyDeclPtr = LazyOffsetPtr; /// A lazy pointer to a set of CXXCtorInitializers. using LazyCXXCtorInitializersPtr = LazyOffsetPtr; /// A lazy pointer to a set of CXXBaseSpecifiers. using LazyCXXBaseSpecifiersPtr = LazyOffsetPtr; } // namespace clang #endif // LLVM_CLANG_AST_EXTERNALASTSOURCE_H diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/ARM.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/ARM.cpp index 7423626d7c3c..e55feedbd5c6 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/ARM.cpp +++ b/contrib/llvm-project/clang/lib/Basic/Targets/ARM.cpp @@ -1,1510 +1,1512 @@ //===--- ARM.cpp - Implement ARM target feature support -------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements ARM TargetInfo objects. // //===----------------------------------------------------------------------===// #include "ARM.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetBuiltins.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/TargetParser/ARMTargetParser.h" using namespace clang; using namespace clang::targets; void ARMTargetInfo::setABIAAPCS() { IsAAPCS = true; DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 64; BFloat16Width = BFloat16Align = 16; BFloat16Format = &llvm::APFloat::BFloat(); const llvm::Triple &T = getTriple(); bool IsNetBSD = T.isOSNetBSD(); bool IsOpenBSD = T.isOSOpenBSD(); if (!T.isOSWindows() && !IsNetBSD && !IsOpenBSD) WCharType = UnsignedInt; UseBitFieldTypeAlignment = true; ZeroLengthBitfieldBoundary = 0; // Thumb1 add sp, #imm requires the immediate value be multiple of 4, // so set preferred for small types to 32. if (T.isOSBinFormatMachO()) { resetDataLayout(BigEndian ? "E-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" : "e-m:o-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64", "_"); } else if (T.isOSWindows()) { assert(!BigEndian && "Windows on ARM does not support big endian"); resetDataLayout("e" "-m:w" "-p:32:32" "-Fi8" "-i64:64" "-v128:64:128" "-a:0:32" "-n32" "-S64"); } else if (T.isOSNaCl()) { assert(!BigEndian && "NaCl on ARM does not support big endian"); resetDataLayout("e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S128"); } else { resetDataLayout(BigEndian ? "E-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" : "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"); } // FIXME: Enumerated types are variable width in straight AAPCS. } void ARMTargetInfo::setABIAPCS(bool IsAAPCS16) { const llvm::Triple &T = getTriple(); IsAAPCS = false; if (IsAAPCS16) DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 64; else DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32; BFloat16Width = BFloat16Align = 16; BFloat16Format = &llvm::APFloat::BFloat(); WCharType = SignedInt; // Do not respect the alignment of bit-field types when laying out // structures. This corresponds to PCC_BITFIELD_TYPE_MATTERS in gcc. UseBitFieldTypeAlignment = false; /// gcc forces the alignment to 4 bytes, regardless of the type of the /// zero length bitfield. This corresponds to EMPTY_FIELD_BOUNDARY in /// gcc. ZeroLengthBitfieldBoundary = 32; if (T.isOSBinFormatMachO() && IsAAPCS16) { assert(!BigEndian && "AAPCS16 does not support big-endian"); resetDataLayout("e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128", "_"); } else if (T.isOSBinFormatMachO()) resetDataLayout( BigEndian ? "E-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" : "e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32", "_"); else resetDataLayout( BigEndian ? "E-m:e-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" : "e-m:e-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"); // FIXME: Override "preferred align" for double and long long. } void ARMTargetInfo::setArchInfo() { StringRef ArchName = getTriple().getArchName(); ArchISA = llvm::ARM::parseArchISA(ArchName); CPU = std::string(llvm::ARM::getDefaultCPU(ArchName)); llvm::ARM::ArchKind AK = llvm::ARM::parseArch(ArchName); if (AK != llvm::ARM::ArchKind::INVALID) ArchKind = AK; setArchInfo(ArchKind); } void ARMTargetInfo::setArchInfo(llvm::ARM::ArchKind Kind) { StringRef SubArch; // cache TargetParser info ArchKind = Kind; SubArch = llvm::ARM::getSubArch(ArchKind); ArchProfile = llvm::ARM::parseArchProfile(SubArch); ArchVersion = llvm::ARM::parseArchVersion(SubArch); // cache CPU related strings CPUAttr = getCPUAttr(); CPUProfile = getCPUProfile(); } void ARMTargetInfo::setAtomic() { // when triple does not specify a sub arch, // then we are not using inline atomics bool ShouldUseInlineAtomic = (ArchISA == llvm::ARM::ISAKind::ARM && ArchVersion >= 6) || (ArchISA == llvm::ARM::ISAKind::THUMB && ArchVersion >= 7); // Cortex M does not support 8 byte atomics, while general Thumb2 does. if (ArchProfile == llvm::ARM::ProfileKind::M) { MaxAtomicPromoteWidth = 32; if (ShouldUseInlineAtomic) MaxAtomicInlineWidth = 32; } else { MaxAtomicPromoteWidth = 64; if (ShouldUseInlineAtomic) MaxAtomicInlineWidth = 64; } } bool ARMTargetInfo::hasMVE() const { return ArchKind == llvm::ARM::ArchKind::ARMV8_1MMainline && MVE != 0; } bool ARMTargetInfo::hasMVEFloat() const { return hasMVE() && (MVE & MVE_FP); } bool ARMTargetInfo::hasCDE() const { return getARMCDECoprocMask() != 0; } bool ARMTargetInfo::isThumb() const { return ArchISA == llvm::ARM::ISAKind::THUMB; } bool ARMTargetInfo::supportsThumb() const { return CPUAttr.count('T') || ArchVersion >= 6; } bool ARMTargetInfo::supportsThumb2() const { return CPUAttr == "6T2" || (ArchVersion >= 7 && CPUAttr != "8M_BASE"); } StringRef ARMTargetInfo::getCPUAttr() const { // For most sub-arches, the build attribute CPU name is enough. // For Cortex variants, it's slightly different. switch (ArchKind) { default: return llvm::ARM::getCPUAttr(ArchKind); case llvm::ARM::ArchKind::ARMV6M: return "6M"; case llvm::ARM::ArchKind::ARMV7S: return "7S"; case llvm::ARM::ArchKind::ARMV7A: return "7A"; case llvm::ARM::ArchKind::ARMV7R: return "7R"; case llvm::ARM::ArchKind::ARMV7M: return "7M"; case llvm::ARM::ArchKind::ARMV7EM: return "7EM"; case llvm::ARM::ArchKind::ARMV7VE: return "7VE"; case llvm::ARM::ArchKind::ARMV8A: return "8A"; case llvm::ARM::ArchKind::ARMV8_1A: return "8_1A"; case llvm::ARM::ArchKind::ARMV8_2A: return "8_2A"; case llvm::ARM::ArchKind::ARMV8_3A: return "8_3A"; case llvm::ARM::ArchKind::ARMV8_4A: return "8_4A"; case llvm::ARM::ArchKind::ARMV8_5A: return "8_5A"; case llvm::ARM::ArchKind::ARMV8_6A: return "8_6A"; case llvm::ARM::ArchKind::ARMV8_7A: return "8_7A"; case llvm::ARM::ArchKind::ARMV8_8A: return "8_8A"; case llvm::ARM::ArchKind::ARMV8_9A: return "8_9A"; case llvm::ARM::ArchKind::ARMV9A: return "9A"; case llvm::ARM::ArchKind::ARMV9_1A: return "9_1A"; case llvm::ARM::ArchKind::ARMV9_2A: return "9_2A"; case llvm::ARM::ArchKind::ARMV9_3A: return "9_3A"; case llvm::ARM::ArchKind::ARMV9_4A: return "9_4A"; case llvm::ARM::ArchKind::ARMV9_5A: return "9_5A"; case llvm::ARM::ArchKind::ARMV8MBaseline: return "8M_BASE"; case llvm::ARM::ArchKind::ARMV8MMainline: return "8M_MAIN"; case llvm::ARM::ArchKind::ARMV8R: return "8R"; case llvm::ARM::ArchKind::ARMV8_1MMainline: return "8_1M_MAIN"; } } StringRef ARMTargetInfo::getCPUProfile() const { switch (ArchProfile) { case llvm::ARM::ProfileKind::A: return "A"; case llvm::ARM::ProfileKind::R: return "R"; case llvm::ARM::ProfileKind::M: return "M"; default: return ""; } } ARMTargetInfo::ARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : TargetInfo(Triple), FPMath(FP_Default), IsAAPCS(true), LDREX(0), HW_FP(0) { bool IsFreeBSD = Triple.isOSFreeBSD(); bool IsOpenBSD = Triple.isOSOpenBSD(); bool IsNetBSD = Triple.isOSNetBSD(); bool IsHaiku = Triple.isOSHaiku(); bool IsOHOS = Triple.isOHOSFamily(); // FIXME: the isOSBinFormatMachO is a workaround for identifying a Darwin-like // environment where size_t is `unsigned long` rather than `unsigned int` PtrDiffType = IntPtrType = (Triple.isOSDarwin() || Triple.isOSBinFormatMachO() || IsOpenBSD || IsNetBSD) ? SignedLong : SignedInt; SizeType = (Triple.isOSDarwin() || Triple.isOSBinFormatMachO() || IsOpenBSD || IsNetBSD) ? UnsignedLong : UnsignedInt; // ptrdiff_t is inconsistent on Darwin if ((Triple.isOSDarwin() || Triple.isOSBinFormatMachO()) && !Triple.isWatchABI()) PtrDiffType = SignedInt; // Cache arch related info. setArchInfo(); // {} in inline assembly are neon specifiers, not assembly variant // specifiers. NoAsmVariants = true; // FIXME: This duplicates code from the driver that sets the -target-abi // option - this code is used if -target-abi isn't passed and should // be unified in some way. if (Triple.isOSBinFormatMachO()) { // The backend is hardwired to assume AAPCS for M-class processors, ensure // the frontend matches that. if (Triple.getEnvironment() == llvm::Triple::EABI || Triple.getOS() == llvm::Triple::UnknownOS || ArchProfile == llvm::ARM::ProfileKind::M) { setABI("aapcs"); } else if (Triple.isWatchABI()) { setABI("aapcs16"); } else { setABI("apcs-gnu"); } } else if (Triple.isOSWindows()) { // FIXME: this is invalid for WindowsCE setABI("aapcs"); } else { // Select the default based on the platform. switch (Triple.getEnvironment()) { case llvm::Triple::Android: case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIT64: case llvm::Triple::GNUEABIHF: + case llvm::Triple::GNUEABIHFT64: case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: case llvm::Triple::OpenHOS: setABI("aapcs-linux"); break; case llvm::Triple::EABIHF: case llvm::Triple::EABI: setABI("aapcs"); break; case llvm::Triple::GNU: setABI("apcs-gnu"); break; default: if (IsNetBSD) setABI("apcs-gnu"); else if (IsFreeBSD || IsOpenBSD || IsHaiku || IsOHOS) setABI("aapcs-linux"); else setABI("aapcs"); break; } } // ARM targets default to using the ARM C++ ABI. TheCXXABI.set(TargetCXXABI::GenericARM); // ARM has atomics up to 8 bytes setAtomic(); // Maximum alignment for ARM NEON data types should be 64-bits (AAPCS) // as well the default alignment if (IsAAPCS && !Triple.isAndroid()) DefaultAlignForAttributeAligned = MaxVectorAlign = 64; // Do force alignment of members that follow zero length bitfields. If // the alignment of the zero-length bitfield is greater than the member // that follows it, `bar', `bar' will be aligned as the type of the // zero length bitfield. UseZeroLengthBitfieldAlignment = true; if (Triple.getOS() == llvm::Triple::Linux || Triple.getOS() == llvm::Triple::UnknownOS) this->MCountName = Opts.EABIVersion == llvm::EABI::GNU ? "llvm.arm.gnu.eabi.mcount" : "\01mcount"; SoftFloatABI = llvm::is_contained(Opts.FeaturesAsWritten, "+soft-float-abi"); } StringRef ARMTargetInfo::getABI() const { return ABI; } bool ARMTargetInfo::setABI(const std::string &Name) { ABI = Name; // The defaults (above) are for AAPCS, check if we need to change them. // // FIXME: We need support for -meabi... we could just mangle it into the // name. if (Name == "apcs-gnu" || Name == "aapcs16") { setABIAPCS(Name == "aapcs16"); return true; } if (Name == "aapcs" || Name == "aapcs-vfp" || Name == "aapcs-linux") { setABIAAPCS(); return true; } return false; } bool ARMTargetInfo::isBranchProtectionSupportedArch(StringRef Arch) const { llvm::ARM::ArchKind CPUArch = llvm::ARM::parseCPUArch(Arch); if (CPUArch == llvm::ARM::ArchKind::INVALID) CPUArch = llvm::ARM::parseArch(getTriple().getArchName()); if (CPUArch == llvm::ARM::ArchKind::INVALID) return false; StringRef ArchFeature = llvm::ARM::getArchName(CPUArch); auto a = llvm::Triple(ArchFeature, getTriple().getVendorName(), getTriple().getOSName(), getTriple().getEnvironmentName()); StringRef SubArch = llvm::ARM::getSubArch(CPUArch); llvm::ARM::ProfileKind Profile = llvm::ARM::parseArchProfile(SubArch); return a.isArmT32() && (Profile == llvm::ARM::ProfileKind::M); } bool ARMTargetInfo::validateBranchProtection(StringRef Spec, StringRef Arch, BranchProtectionInfo &BPI, StringRef &Err) const { llvm::ARM::ParsedBranchProtection PBP; if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err)) return false; if (!isBranchProtectionSupportedArch(Arch)) return false; BPI.SignReturnAddr = llvm::StringSwitch(PBP.Scope) .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf) .Case("all", LangOptions::SignReturnAddressScopeKind::All) .Default(LangOptions::SignReturnAddressScopeKind::None); // Don't care for the sign key, beyond issuing a warning. if (PBP.Key == "b_key") Err = "b-key"; BPI.SignKey = LangOptions::SignReturnAddressKeyKind::AKey; BPI.BranchTargetEnforcement = PBP.BranchTargetEnforcement; BPI.BranchProtectionPAuthLR = PBP.BranchProtectionPAuthLR; return true; } // FIXME: This should be based on Arch attributes, not CPU names. bool ARMTargetInfo::initFeatureMap( llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, const std::vector &FeaturesVec) const { std::string ArchFeature; std::vector TargetFeatures; llvm::ARM::ArchKind Arch = llvm::ARM::parseArch(getTriple().getArchName()); // Map the base architecture to an appropriate target feature, so we don't // rely on the target triple. llvm::ARM::ArchKind CPUArch = llvm::ARM::parseCPUArch(CPU); if (CPUArch == llvm::ARM::ArchKind::INVALID) CPUArch = Arch; if (CPUArch != llvm::ARM::ArchKind::INVALID) { ArchFeature = ("+" + llvm::ARM::getArchName(CPUArch)).str(); TargetFeatures.push_back(ArchFeature); // These features are added to allow arm_neon.h target(..) attributes to // match with both arm and aarch64. We need to add all previous architecture // versions, so that "8.6" also allows "8.1" functions. In case of v9.x the // v8.x counterparts are added too. We only need these for anything > 8.0-A. for (llvm::ARM::ArchKind I = llvm::ARM::convertV9toV8(CPUArch); I != llvm::ARM::ArchKind::INVALID; --I) Features[llvm::ARM::getSubArch(I)] = true; if (CPUArch > llvm::ARM::ArchKind::ARMV8A && CPUArch <= llvm::ARM::ArchKind::ARMV9_3A) for (llvm::ARM::ArchKind I = CPUArch; I != llvm::ARM::ArchKind::INVALID; --I) Features[llvm::ARM::getSubArch(I)] = true; } // get default FPU features llvm::ARM::FPUKind FPUKind = llvm::ARM::getDefaultFPU(CPU, Arch); llvm::ARM::getFPUFeatures(FPUKind, TargetFeatures); // get default Extension features uint64_t Extensions = llvm::ARM::getDefaultExtensions(CPU, Arch); llvm::ARM::getExtensionFeatures(Extensions, TargetFeatures); for (auto Feature : TargetFeatures) if (Feature[0] == '+') Features[Feature.drop_front(1)] = true; // Enable or disable thumb-mode explicitly per function to enable mixed // ARM and Thumb code generation. if (isThumb()) Features["thumb-mode"] = true; else Features["thumb-mode"] = false; // Convert user-provided arm and thumb GNU target attributes to // [-|+]thumb-mode target features respectively. std::vector UpdatedFeaturesVec; for (const auto &Feature : FeaturesVec) { // Skip soft-float-abi; it's something we only use to initialize a bit of // class state, and is otherwise unrecognized. if (Feature == "+soft-float-abi") continue; StringRef FixedFeature; if (Feature == "+arm") FixedFeature = "-thumb-mode"; else if (Feature == "+thumb") FixedFeature = "+thumb-mode"; else FixedFeature = Feature; UpdatedFeaturesVec.push_back(FixedFeature.str()); } return TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec); } bool ARMTargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { FPU = 0; MVE = 0; CRC = 0; Crypto = 0; SHA2 = 0; AES = 0; DSP = 0; HasUnalignedAccess = true; SoftFloat = false; // Note that SoftFloatABI is initialized in our constructor. HWDiv = 0; DotProd = 0; HasMatMul = 0; HasPAC = 0; HasBTI = 0; HasFloat16 = true; ARMCDECoprocMask = 0; HasBFloat16 = false; HasFullBFloat16 = false; FPRegsDisabled = false; // This does not diagnose illegal cases like having both // "+vfpv2" and "+vfpv3" or having "+neon" and "-fp64". for (const auto &Feature : Features) { if (Feature == "+soft-float") { SoftFloat = true; } else if (Feature == "+vfp2sp" || Feature == "+vfp2") { FPU |= VFP2FPU; HW_FP |= HW_FP_SP; if (Feature == "+vfp2") HW_FP |= HW_FP_DP; } else if (Feature == "+vfp3sp" || Feature == "+vfp3d16sp" || Feature == "+vfp3" || Feature == "+vfp3d16") { FPU |= VFP3FPU; HW_FP |= HW_FP_SP; if (Feature == "+vfp3" || Feature == "+vfp3d16") HW_FP |= HW_FP_DP; } else if (Feature == "+vfp4sp" || Feature == "+vfp4d16sp" || Feature == "+vfp4" || Feature == "+vfp4d16") { FPU |= VFP4FPU; HW_FP |= HW_FP_SP | HW_FP_HP; if (Feature == "+vfp4" || Feature == "+vfp4d16") HW_FP |= HW_FP_DP; } else if (Feature == "+fp-armv8sp" || Feature == "+fp-armv8d16sp" || Feature == "+fp-armv8" || Feature == "+fp-armv8d16") { FPU |= FPARMV8; HW_FP |= HW_FP_SP | HW_FP_HP; if (Feature == "+fp-armv8" || Feature == "+fp-armv8d16") HW_FP |= HW_FP_DP; } else if (Feature == "+neon") { FPU |= NeonFPU; HW_FP |= HW_FP_SP; } else if (Feature == "+hwdiv") { HWDiv |= HWDivThumb; } else if (Feature == "+hwdiv-arm") { HWDiv |= HWDivARM; } else if (Feature == "+crc") { CRC = 1; } else if (Feature == "+crypto") { Crypto = 1; } else if (Feature == "+sha2") { SHA2 = 1; } else if (Feature == "+aes") { AES = 1; } else if (Feature == "+dsp") { DSP = 1; } else if (Feature == "+fp64") { HW_FP |= HW_FP_DP; } else if (Feature == "+8msecext") { if (CPUProfile != "M" || ArchVersion != 8) { Diags.Report(diag::err_target_unsupported_mcmse) << CPU; return false; } } else if (Feature == "+strict-align") { HasUnalignedAccess = false; } else if (Feature == "+fp16") { HW_FP |= HW_FP_HP; } else if (Feature == "+fullfp16") { HasLegalHalfType = true; } else if (Feature == "+dotprod") { DotProd = true; } else if (Feature == "+mve") { MVE |= MVE_INT; } else if (Feature == "+mve.fp") { HasLegalHalfType = true; FPU |= FPARMV8; MVE |= MVE_INT | MVE_FP; HW_FP |= HW_FP_SP | HW_FP_HP; } else if (Feature == "+i8mm") { HasMatMul = 1; } else if (Feature.size() == strlen("+cdecp0") && Feature >= "+cdecp0" && Feature <= "+cdecp7") { unsigned Coproc = Feature.back() - '0'; ARMCDECoprocMask |= (1U << Coproc); } else if (Feature == "+bf16") { HasBFloat16 = true; } else if (Feature == "-fpregs") { FPRegsDisabled = true; } else if (Feature == "+pacbti") { HasPAC = 1; HasBTI = 1; } else if (Feature == "+fullbf16") { HasFullBFloat16 = true; } } HalfArgsAndReturns = true; switch (ArchVersion) { case 6: if (ArchProfile == llvm::ARM::ProfileKind::M) LDREX = 0; else if (ArchKind == llvm::ARM::ArchKind::ARMV6K) LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B; else LDREX = LDREX_W; break; case 7: if (ArchProfile == llvm::ARM::ProfileKind::M) LDREX = LDREX_W | LDREX_H | LDREX_B; else LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B; break; case 8: case 9: LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B; } if (!(FPU & NeonFPU) && FPMath == FP_Neon) { Diags.Report(diag::err_target_unsupported_fpmath) << "neon"; return false; } if (FPMath == FP_Neon) Features.push_back("+neonfp"); else if (FPMath == FP_VFP) Features.push_back("-neonfp"); return true; } bool ARMTargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) .Case("arm", true) .Case("aarch32", true) .Case("softfloat", SoftFloat) .Case("thumb", isThumb()) .Case("neon", (FPU & NeonFPU) && !SoftFloat) .Case("vfp", FPU && !SoftFloat) .Case("hwdiv", HWDiv & HWDivThumb) .Case("hwdiv-arm", HWDiv & HWDivARM) .Case("mve", hasMVE()) .Default(false); } bool ARMTargetInfo::hasBFloat16Type() const { // The __bf16 type is generally available so long as we have any fp registers. return HasBFloat16 || (FPU && !SoftFloat); } bool ARMTargetInfo::isValidCPUName(StringRef Name) const { return Name == "generic" || llvm::ARM::parseCPUArch(Name) != llvm::ARM::ArchKind::INVALID; } void ARMTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { llvm::ARM::fillValidCPUArchList(Values); } bool ARMTargetInfo::setCPU(const std::string &Name) { if (Name != "generic") setArchInfo(llvm::ARM::parseCPUArch(Name)); if (ArchKind == llvm::ARM::ArchKind::INVALID) return false; setAtomic(); CPU = Name; return true; } bool ARMTargetInfo::setFPMath(StringRef Name) { if (Name == "neon") { FPMath = FP_Neon; return true; } else if (Name == "vfp" || Name == "vfp2" || Name == "vfp3" || Name == "vfp4") { FPMath = FP_VFP; return true; } return false; } void ARMTargetInfo::getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__ARM_FEATURE_QRDMX", "1"); } void ARMTargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts, MacroBuilder &Builder) const { // Also include the ARMv8.1-A defines getTargetDefinesARMV81A(Opts, Builder); } void ARMTargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, MacroBuilder &Builder) const { // Also include the ARMv8.2-A defines Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1"); getTargetDefinesARMV82A(Opts, Builder); } void ARMTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // Target identification. Builder.defineMacro("__arm"); Builder.defineMacro("__arm__"); // For bare-metal none-eabi. if (getTriple().getOS() == llvm::Triple::UnknownOS && (getTriple().getEnvironment() == llvm::Triple::EABI || getTriple().getEnvironment() == llvm::Triple::EABIHF) && Opts.CPlusPlus) { Builder.defineMacro("_GNU_SOURCE"); } // Target properties. Builder.defineMacro("__REGISTER_PREFIX__", ""); // Unfortunately, __ARM_ARCH_7K__ is now more of an ABI descriptor. The CPU // happens to be Cortex-A7 though, so it should still get __ARM_ARCH_7A__. if (getTriple().isWatchABI()) Builder.defineMacro("__ARM_ARCH_7K__", "2"); if (!CPUAttr.empty()) Builder.defineMacro("__ARM_ARCH_" + CPUAttr + "__"); // ACLE 6.4.1 ARM/Thumb instruction set architecture // __ARM_ARCH is defined as an integer value indicating the current ARM ISA Builder.defineMacro("__ARM_ARCH", Twine(ArchVersion)); if (ArchVersion >= 8) { // ACLE 6.5.7 Crypto Extension // The __ARM_FEATURE_CRYPTO is deprecated in favor of finer grained // feature macros for AES and SHA2 if (SHA2 && AES) Builder.defineMacro("__ARM_FEATURE_CRYPTO", "1"); if (SHA2) Builder.defineMacro("__ARM_FEATURE_SHA2", "1"); if (AES) Builder.defineMacro("__ARM_FEATURE_AES", "1"); // ACLE 6.5.8 CRC32 Extension if (CRC) Builder.defineMacro("__ARM_FEATURE_CRC32", "1"); // ACLE 6.5.10 Numeric Maximum and Minimum Builder.defineMacro("__ARM_FEATURE_NUMERIC_MAXMIN", "1"); // ACLE 6.5.9 Directed Rounding Builder.defineMacro("__ARM_FEATURE_DIRECTED_ROUNDING", "1"); } // __ARM_ARCH_ISA_ARM is defined to 1 if the core supports the ARM ISA. It // is not defined for the M-profile. // NOTE that the default profile is assumed to be 'A' if (CPUProfile.empty() || ArchProfile != llvm::ARM::ProfileKind::M) Builder.defineMacro("__ARM_ARCH_ISA_ARM", "1"); // __ARM_ARCH_ISA_THUMB is defined to 1 if the core supports the original // Thumb ISA (including v6-M and v8-M Baseline). It is set to 2 if the // core supports the Thumb-2 ISA as found in the v6T2 architecture and all // v7 and v8 architectures excluding v8-M Baseline. if (supportsThumb2()) Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "2"); else if (supportsThumb()) Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "1"); // __ARM_32BIT_STATE is defined to 1 if code is being generated for a 32-bit // instruction set such as ARM or Thumb. Builder.defineMacro("__ARM_32BIT_STATE", "1"); // ACLE 6.4.2 Architectural Profile (A, R, M or pre-Cortex) // __ARM_ARCH_PROFILE is defined as 'A', 'R', 'M' or 'S', or unset. if (!CPUProfile.empty()) Builder.defineMacro("__ARM_ARCH_PROFILE", "'" + CPUProfile + "'"); // ACLE 6.4.3 Unaligned access supported in hardware if (HasUnalignedAccess) Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1"); // ACLE 6.4.4 LDREX/STREX if (LDREX) Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + Twine::utohexstr(LDREX)); // ACLE 6.4.5 CLZ if (ArchVersion == 5 || (ArchVersion == 6 && CPUProfile != "M") || ArchVersion > 6) Builder.defineMacro("__ARM_FEATURE_CLZ", "1"); // ACLE 6.5.1 Hardware Floating Point if (HW_FP) Builder.defineMacro("__ARM_FP", "0x" + Twine::utohexstr(HW_FP)); // ACLE predefines. Builder.defineMacro("__ARM_ACLE", "200"); // FP16 support (we currently only support IEEE format). Builder.defineMacro("__ARM_FP16_FORMAT_IEEE", "1"); Builder.defineMacro("__ARM_FP16_ARGS", "1"); // ACLE 6.5.3 Fused multiply-accumulate (FMA) if (ArchVersion >= 7 && (FPU & VFP4FPU)) Builder.defineMacro("__ARM_FEATURE_FMA", "1"); // Subtarget options. // FIXME: It's more complicated than this and we don't really support // interworking. // Windows on ARM does not "support" interworking if (5 <= ArchVersion && ArchVersion <= 8 && !getTriple().isOSWindows()) Builder.defineMacro("__THUMB_INTERWORK__"); if (ABI == "aapcs" || ABI == "aapcs-linux" || ABI == "aapcs-vfp") { // Embedded targets on Darwin follow AAPCS, but not EABI. // Windows on ARM follows AAPCS VFP, but does not conform to EABI. if (!getTriple().isOSBinFormatMachO() && !getTriple().isOSWindows()) Builder.defineMacro("__ARM_EABI__"); Builder.defineMacro("__ARM_PCS", "1"); } if ((!SoftFloat && !SoftFloatABI) || ABI == "aapcs-vfp" || ABI == "aapcs16") Builder.defineMacro("__ARM_PCS_VFP", "1"); if (SoftFloat || (SoftFloatABI && !FPU)) Builder.defineMacro("__SOFTFP__"); // ACLE position independent code macros. if (Opts.ROPI) Builder.defineMacro("__ARM_ROPI", "1"); if (Opts.RWPI) Builder.defineMacro("__ARM_RWPI", "1"); // Macros for enabling co-proc intrinsics uint64_t FeatureCoprocBF = 0; switch (ArchKind) { default: break; case llvm::ARM::ArchKind::ARMV4: case llvm::ARM::ArchKind::ARMV4T: // Filter __arm_ldcl and __arm_stcl in acle.h FeatureCoprocBF = isThumb() ? 0 : FEATURE_COPROC_B1; break; case llvm::ARM::ArchKind::ARMV5T: FeatureCoprocBF = isThumb() ? 0 : FEATURE_COPROC_B1 | FEATURE_COPROC_B2; break; case llvm::ARM::ArchKind::ARMV5TE: case llvm::ARM::ArchKind::ARMV5TEJ: if (!isThumb()) FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3; break; case llvm::ARM::ArchKind::ARMV6: case llvm::ARM::ArchKind::ARMV6K: case llvm::ARM::ArchKind::ARMV6KZ: case llvm::ARM::ArchKind::ARMV6T2: if (!isThumb() || ArchKind == llvm::ARM::ArchKind::ARMV6T2) FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3 | FEATURE_COPROC_B4; break; case llvm::ARM::ArchKind::ARMV7A: case llvm::ARM::ArchKind::ARMV7R: case llvm::ARM::ArchKind::ARMV7M: case llvm::ARM::ArchKind::ARMV7S: case llvm::ARM::ArchKind::ARMV7EM: FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3 | FEATURE_COPROC_B4; break; case llvm::ARM::ArchKind::ARMV8A: case llvm::ARM::ArchKind::ARMV8R: case llvm::ARM::ArchKind::ARMV8_1A: case llvm::ARM::ArchKind::ARMV8_2A: case llvm::ARM::ArchKind::ARMV8_3A: case llvm::ARM::ArchKind::ARMV8_4A: case llvm::ARM::ArchKind::ARMV8_5A: case llvm::ARM::ArchKind::ARMV8_6A: case llvm::ARM::ArchKind::ARMV8_7A: case llvm::ARM::ArchKind::ARMV8_8A: case llvm::ARM::ArchKind::ARMV8_9A: case llvm::ARM::ArchKind::ARMV9A: case llvm::ARM::ArchKind::ARMV9_1A: case llvm::ARM::ArchKind::ARMV9_2A: case llvm::ARM::ArchKind::ARMV9_3A: case llvm::ARM::ArchKind::ARMV9_4A: case llvm::ARM::ArchKind::ARMV9_5A: // Filter __arm_cdp, __arm_ldcl, __arm_stcl in arm_acle.h FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B3; break; case llvm::ARM::ArchKind::ARMV8MMainline: case llvm::ARM::ArchKind::ARMV8_1MMainline: FeatureCoprocBF = FEATURE_COPROC_B1 | FEATURE_COPROC_B2 | FEATURE_COPROC_B3 | FEATURE_COPROC_B4; break; } Builder.defineMacro("__ARM_FEATURE_COPROC", "0x" + Twine::utohexstr(FeatureCoprocBF)); if (ArchKind == llvm::ARM::ArchKind::XSCALE) Builder.defineMacro("__XSCALE__"); if (isThumb()) { Builder.defineMacro("__THUMBEL__"); Builder.defineMacro("__thumb__"); if (supportsThumb2()) Builder.defineMacro("__thumb2__"); } // ACLE 6.4.9 32-bit SIMD instructions if ((CPUProfile != "M" && ArchVersion >= 6) || (CPUProfile == "M" && DSP)) Builder.defineMacro("__ARM_FEATURE_SIMD32", "1"); // ACLE 6.4.10 Hardware Integer Divide if (((HWDiv & HWDivThumb) && isThumb()) || ((HWDiv & HWDivARM) && !isThumb())) { Builder.defineMacro("__ARM_FEATURE_IDIV", "1"); Builder.defineMacro("__ARM_ARCH_EXT_IDIV__", "1"); } // Note, this is always on in gcc, even though it doesn't make sense. Builder.defineMacro("__APCS_32__"); // __VFP_FP__ means that the floating-point format is VFP, not that a hardware // FPU is present. Moreover, the VFP format is the only one supported by // clang. For these reasons, this macro is always defined. Builder.defineMacro("__VFP_FP__"); if (FPUModeIsVFP((FPUMode)FPU)) { if (FPU & VFP2FPU) Builder.defineMacro("__ARM_VFPV2__"); if (FPU & VFP3FPU) Builder.defineMacro("__ARM_VFPV3__"); if (FPU & VFP4FPU) Builder.defineMacro("__ARM_VFPV4__"); if (FPU & FPARMV8) Builder.defineMacro("__ARM_FPV5__"); } // This only gets set when Neon instructions are actually available, unlike // the VFP define, hence the soft float and arch check. This is subtly // different from gcc, we follow the intent which was that it should be set // when Neon instructions are actually available. if ((FPU & NeonFPU) && !SoftFloat && ArchVersion >= 7) { Builder.defineMacro("__ARM_NEON", "1"); Builder.defineMacro("__ARM_NEON__"); // current AArch32 NEON implementations do not support double-precision // floating-point even when it is present in VFP. Builder.defineMacro("__ARM_NEON_FP", "0x" + Twine::utohexstr(HW_FP & ~HW_FP_DP)); } if (hasMVE()) { Builder.defineMacro("__ARM_FEATURE_MVE", hasMVEFloat() ? "3" : "1"); } if (hasCDE()) { Builder.defineMacro("__ARM_FEATURE_CDE", "1"); Builder.defineMacro("__ARM_FEATURE_CDE_COPROC", "0x" + Twine::utohexstr(getARMCDECoprocMask())); } Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", Twine(Opts.WCharSize ? Opts.WCharSize : 4)); Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM", Opts.ShortEnums ? "1" : "4"); // CMSE if (ArchVersion == 8 && ArchProfile == llvm::ARM::ProfileKind::M) Builder.defineMacro("__ARM_FEATURE_CMSE", Opts.Cmse ? "3" : "1"); if (ArchVersion >= 6 && CPUAttr != "6M" && CPUAttr != "8M_BASE") { Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); } // ACLE 6.4.7 DSP instructions if (DSP) { Builder.defineMacro("__ARM_FEATURE_DSP", "1"); } // ACLE 6.4.8 Saturation instructions bool SAT = false; if ((ArchVersion == 6 && CPUProfile != "M") || ArchVersion > 6) { Builder.defineMacro("__ARM_FEATURE_SAT", "1"); SAT = true; } // ACLE 6.4.6 Q (saturation) flag if (DSP || SAT) Builder.defineMacro("__ARM_FEATURE_QBIT", "1"); if (Opts.UnsafeFPMath) Builder.defineMacro("__ARM_FP_FAST", "1"); // Armv8.2-A FP16 vector intrinsic if ((FPU & NeonFPU) && HasLegalHalfType) Builder.defineMacro("__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", "1"); // Armv8.2-A FP16 scalar intrinsics if (HasLegalHalfType) Builder.defineMacro("__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", "1"); // Armv8.2-A dot product intrinsics if (DotProd) Builder.defineMacro("__ARM_FEATURE_DOTPROD", "1"); if (HasMatMul) Builder.defineMacro("__ARM_FEATURE_MATMUL_INT8", "1"); if (HasPAC) Builder.defineMacro("__ARM_FEATURE_PAUTH", "1"); if (HasBTI) Builder.defineMacro("__ARM_FEATURE_BTI", "1"); if (HasBFloat16) { Builder.defineMacro("__ARM_FEATURE_BF16", "1"); Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1"); Builder.defineMacro("__ARM_BF16_FORMAT_ALTERNATIVE", "1"); } if (Opts.BranchTargetEnforcement) Builder.defineMacro("__ARM_FEATURE_BTI_DEFAULT", "1"); if (Opts.hasSignReturnAddress()) { unsigned Value = 1; if (Opts.isSignReturnAddressScopeAll()) Value |= 1 << 2; Builder.defineMacro("__ARM_FEATURE_PAC_DEFAULT", Twine(Value)); } switch (ArchKind) { default: break; case llvm::ARM::ArchKind::ARMV8_1A: getTargetDefinesARMV81A(Opts, Builder); break; case llvm::ARM::ArchKind::ARMV8_2A: getTargetDefinesARMV82A(Opts, Builder); break; case llvm::ARM::ArchKind::ARMV8_3A: case llvm::ARM::ArchKind::ARMV8_4A: case llvm::ARM::ArchKind::ARMV8_5A: case llvm::ARM::ArchKind::ARMV8_6A: case llvm::ARM::ArchKind::ARMV8_7A: case llvm::ARM::ArchKind::ARMV8_8A: case llvm::ARM::ArchKind::ARMV8_9A: case llvm::ARM::ArchKind::ARMV9A: case llvm::ARM::ArchKind::ARMV9_1A: case llvm::ARM::ArchKind::ARMV9_2A: case llvm::ARM::ArchKind::ARMV9_3A: case llvm::ARM::ArchKind::ARMV9_4A: case llvm::ARM::ArchKind::ARMV9_5A: getTargetDefinesARMV83A(Opts, Builder); break; } } static constexpr Builtin::Info BuiltinInfo[] = { #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #include "clang/Basic/BuiltinsNEON.def" #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANG}, #define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS}, #include "clang/Basic/BuiltinsARM.def" }; ArrayRef ARMTargetInfo::getTargetBuiltins() const { return llvm::ArrayRef(BuiltinInfo, clang::ARM::LastTSBuiltin - Builtin::FirstTSBuiltin); } bool ARMTargetInfo::isCLZForZeroUndef() const { return false; } TargetInfo::BuiltinVaListKind ARMTargetInfo::getBuiltinVaListKind() const { return IsAAPCS ? AAPCSABIBuiltinVaList : (getTriple().isWatchABI() ? TargetInfo::CharPtrBuiltinVaList : TargetInfo::VoidPtrBuiltinVaList); } const char *const ARMTargetInfo::GCCRegNames[] = { // Integer registers "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc", // Float registers "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31", // Double registers "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", // Quad registers "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"}; ArrayRef ARMTargetInfo::getGCCRegNames() const { return llvm::ArrayRef(GCCRegNames); } const TargetInfo::GCCRegAlias ARMTargetInfo::GCCRegAliases[] = { {{"a1"}, "r0"}, {{"a2"}, "r1"}, {{"a3"}, "r2"}, {{"a4"}, "r3"}, {{"v1"}, "r4"}, {{"v2"}, "r5"}, {{"v3"}, "r6"}, {{"v4"}, "r7"}, {{"v5"}, "r8"}, {{"v6", "rfp"}, "r9"}, {{"sl"}, "r10"}, {{"fp"}, "r11"}, {{"ip"}, "r12"}, {{"r13"}, "sp"}, {{"r14"}, "lr"}, {{"r15"}, "pc"}, // The S, D and Q registers overlap, but aren't really aliases; we // don't want to substitute one of these for a different-sized one. }; ArrayRef ARMTargetInfo::getGCCRegAliases() const { return llvm::ArrayRef(GCCRegAliases); } bool ARMTargetInfo::validateAsmConstraint( const char *&Name, TargetInfo::ConstraintInfo &Info) const { switch (*Name) { default: break; case 'l': // r0-r7 if thumb, r0-r15 if ARM Info.setAllowsRegister(); return true; case 'h': // r8-r15, thumb only if (isThumb()) { Info.setAllowsRegister(); return true; } break; case 's': // An integer constant, but allowing only relocatable values. return true; case 't': // s0-s31, d0-d31, or q0-q15 case 'w': // s0-s15, d0-d7, or q0-q3 case 'x': // s0-s31, d0-d15, or q0-q7 if (FPRegsDisabled) return false; Info.setAllowsRegister(); return true; case 'j': // An immediate integer between 0 and 65535 (valid for MOVW) // only available in ARMv6T2 and above if (CPUAttr == "6T2" || ArchVersion >= 7) { Info.setRequiresImmediate(0, 65535); return true; } break; case 'I': if (isThumb()) { if (!supportsThumb2()) Info.setRequiresImmediate(0, 255); else // FIXME: should check if immediate value would be valid for a Thumb2 // data-processing instruction Info.setRequiresImmediate(); } else // FIXME: should check if immediate value would be valid for an ARM // data-processing instruction Info.setRequiresImmediate(); return true; case 'J': if (isThumb() && !supportsThumb2()) Info.setRequiresImmediate(-255, -1); else Info.setRequiresImmediate(-4095, 4095); return true; case 'K': if (isThumb()) { if (!supportsThumb2()) // FIXME: should check if immediate value can be obtained from shifting // a value between 0 and 255 left by any amount Info.setRequiresImmediate(); else // FIXME: should check if immediate value would be valid for a Thumb2 // data-processing instruction when inverted Info.setRequiresImmediate(); } else // FIXME: should check if immediate value would be valid for an ARM // data-processing instruction when inverted Info.setRequiresImmediate(); return true; case 'L': if (isThumb()) { if (!supportsThumb2()) Info.setRequiresImmediate(-7, 7); else // FIXME: should check if immediate value would be valid for a Thumb2 // data-processing instruction when negated Info.setRequiresImmediate(); } else // FIXME: should check if immediate value would be valid for an ARM // data-processing instruction when negated Info.setRequiresImmediate(); return true; case 'M': if (isThumb() && !supportsThumb2()) // FIXME: should check if immediate value is a multiple of 4 between 0 and // 1020 Info.setRequiresImmediate(); else // FIXME: should check if immediate value is a power of two or a integer // between 0 and 32 Info.setRequiresImmediate(); return true; case 'N': // Thumb1 only if (isThumb() && !supportsThumb2()) { Info.setRequiresImmediate(0, 31); return true; } break; case 'O': // Thumb1 only if (isThumb() && !supportsThumb2()) { // FIXME: should check if immediate value is a multiple of 4 between -508 // and 508 Info.setRequiresImmediate(); return true; } break; case 'Q': // A memory address that is a single base register. Info.setAllowsMemory(); return true; case 'T': switch (Name[1]) { default: break; case 'e': // Even general-purpose register case 'o': // Odd general-purpose register Info.setAllowsRegister(); Name++; return true; } break; case 'U': // a memory reference... switch (Name[1]) { case 'q': // ...ARMV4 ldrsb case 'v': // ...VFP load/store (reg+constant offset) case 'y': // ...iWMMXt load/store case 't': // address valid for load/store opaque types wider // than 128-bits case 'n': // valid address for Neon doubleword vector load/store case 'm': // valid address for Neon element and structure load/store case 's': // valid address for non-offset loads/stores of quad-word // values in four ARM registers Info.setAllowsMemory(); Name++; return true; } break; } return false; } std::string ARMTargetInfo::convertConstraint(const char *&Constraint) const { std::string R; switch (*Constraint) { case 'U': // Two-character constraint; add "^" hint for later parsing. case 'T': R = std::string("^") + std::string(Constraint, 2); Constraint++; break; case 'p': // 'p' should be translated to 'r' by default. R = std::string("r"); break; default: return std::string(1, *Constraint); } return R; } bool ARMTargetInfo::validateConstraintModifier( StringRef Constraint, char Modifier, unsigned Size, std::string &SuggestedModifier) const { bool isOutput = (Constraint[0] == '='); bool isInOut = (Constraint[0] == '+'); // Strip off constraint modifiers. Constraint = Constraint.ltrim("=+&"); switch (Constraint[0]) { default: break; case 'r': { switch (Modifier) { default: return (isInOut || isOutput || Size <= 64); case 'q': // A register of size 32 cannot fit a vector type. return false; } } } return true; } std::string_view ARMTargetInfo::getClobbers() const { // FIXME: Is this really right? return ""; } TargetInfo::CallingConvCheckResult ARMTargetInfo::checkCallingConvention(CallingConv CC) const { switch (CC) { case CC_AAPCS: case CC_AAPCS_VFP: case CC_Swift: case CC_SwiftAsync: case CC_OpenCLKernel: return CCCR_OK; default: return CCCR_Warning; } } int ARMTargetInfo::getEHDataRegisterNumber(unsigned RegNo) const { if (RegNo == 0) return 0; if (RegNo == 1) return 1; return -1; } bool ARMTargetInfo::hasSjLjLowering() const { return true; } ARMleTargetInfo::ARMleTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : ARMTargetInfo(Triple, Opts) {} void ARMleTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__ARMEL__"); ARMTargetInfo::getTargetDefines(Opts, Builder); } ARMbeTargetInfo::ARMbeTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : ARMTargetInfo(Triple, Opts) {} void ARMbeTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__ARMEB__"); Builder.defineMacro("__ARM_BIG_ENDIAN"); ARMTargetInfo::getTargetDefines(Opts, Builder); } WindowsARMTargetInfo::WindowsARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : WindowsTargetInfo(Triple, Opts), Triple(Triple) { } void WindowsARMTargetInfo::getVisualStudioDefines(const LangOptions &Opts, MacroBuilder &Builder) const { // FIXME: this is invalid for WindowsCE Builder.defineMacro("_M_ARM_NT", "1"); Builder.defineMacro("_M_ARMT", "_M_ARM"); Builder.defineMacro("_M_THUMB", "_M_ARM"); assert((Triple.getArch() == llvm::Triple::arm || Triple.getArch() == llvm::Triple::thumb) && "invalid architecture for Windows ARM target info"); unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6; Builder.defineMacro("_M_ARM", Triple.getArchName().substr(Offset)); // TODO map the complete set of values // 31: VFPv3 40: VFPv4 Builder.defineMacro("_M_ARM_FP", "31"); } TargetInfo::BuiltinVaListKind WindowsARMTargetInfo::getBuiltinVaListKind() const { return TargetInfo::CharPtrBuiltinVaList; } TargetInfo::CallingConvCheckResult WindowsARMTargetInfo::checkCallingConvention(CallingConv CC) const { switch (CC) { case CC_X86StdCall: case CC_X86ThisCall: case CC_X86FastCall: case CC_X86VectorCall: return CCCR_Ignore; case CC_C: case CC_OpenCLKernel: case CC_PreserveMost: case CC_PreserveAll: case CC_Swift: case CC_SwiftAsync: return CCCR_OK; default: return CCCR_Warning; } } // Windows ARM + Itanium C++ ABI Target ItaniumWindowsARMleTargetInfo::ItaniumWindowsARMleTargetInfo( const llvm::Triple &Triple, const TargetOptions &Opts) : WindowsARMTargetInfo(Triple, Opts) { TheCXXABI.set(TargetCXXABI::GenericARM); } void ItaniumWindowsARMleTargetInfo::getTargetDefines( const LangOptions &Opts, MacroBuilder &Builder) const { WindowsARMTargetInfo::getTargetDefines(Opts, Builder); if (Opts.MSVCCompat) WindowsARMTargetInfo::getVisualStudioDefines(Opts, Builder); } // Windows ARM, MS (C++) ABI MicrosoftARMleTargetInfo::MicrosoftARMleTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : WindowsARMTargetInfo(Triple, Opts) { TheCXXABI.set(TargetCXXABI::Microsoft); } void MicrosoftARMleTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { WindowsARMTargetInfo::getTargetDefines(Opts, Builder); WindowsARMTargetInfo::getVisualStudioDefines(Opts, Builder); } MinGWARMTargetInfo::MinGWARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : WindowsARMTargetInfo(Triple, Opts) { TheCXXABI.set(TargetCXXABI::GenericARM); } void MinGWARMTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { WindowsARMTargetInfo::getTargetDefines(Opts, Builder); Builder.defineMacro("_ARM_"); } CygwinARMTargetInfo::CygwinARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : ARMleTargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedShort; TLSSupported = false; DoubleAlign = LongLongAlign = 64; resetDataLayout("e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"); } void CygwinARMTargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { ARMleTargetInfo::getTargetDefines(Opts, Builder); Builder.defineMacro("_ARM_"); Builder.defineMacro("__CYGWIN__"); Builder.defineMacro("__CYGWIN32__"); DefineStd(Builder, "unix", Opts); if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); } DarwinARMTargetInfo::DarwinARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : DarwinTargetInfo(Triple, Opts) { HasAlignMac68kSupport = true; if (Triple.isWatchABI()) { // Darwin on iOS uses a variant of the ARM C++ ABI. TheCXXABI.set(TargetCXXABI::WatchOS); // BOOL should be a real boolean on the new ABI UseSignedCharForObjCBool = false; } else TheCXXABI.set(TargetCXXABI::iOS); } void DarwinARMTargetInfo::getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const { getDarwinDefines(Builder, Opts, Triple, PlatformName, PlatformMinVersion); } RenderScript32TargetInfo::RenderScript32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : ARMleTargetInfo(llvm::Triple("armv7", Triple.getVendorName(), Triple.getOSName(), Triple.getEnvironmentName()), Opts) { IsRenderScriptTarget = true; LongWidth = LongAlign = 64; } void RenderScript32TargetInfo::getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__RENDERSCRIPT__"); ARMleTargetInfo::getTargetDefines(Opts, Builder); } diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h b/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h index 5f27c3469f86..357c1965057c 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h +++ b/contrib/llvm-project/clang/lib/Basic/Targets/OSTargets.h @@ -1,1006 +1,1010 @@ //===--- OSTargets.h - Declare OS target feature support --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file declares OS specific TargetInfo types. //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H #define LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H #include "Targets.h" namespace clang { namespace targets { template class LLVM_LIBRARY_VISIBILITY OSTargetInfo : public TgtInfo { protected: virtual void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const = 0; public: OSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : TgtInfo(Triple, Opts) {} void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override { TgtInfo::getTargetDefines(Opts, Builder); getOSDefines(Opts, TgtInfo::getTriple(), Builder); } }; void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts, const llvm::Triple &Triple, StringRef &PlatformName, VersionTuple &PlatformMinVersion); template class LLVM_LIBRARY_VISIBILITY DarwinTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { getDarwinDefines(Builder, Opts, Triple, this->PlatformName, this->PlatformMinVersion); } public: DarwinTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { // By default, no TLS, and we list permitted architecture/OS // combinations. this->TLSSupported = false; if (Triple.isMacOSX()) this->TLSSupported = !Triple.isMacOSXVersionLT(10, 7); else if (Triple.isiOS()) { // 64-bit iOS supported it from 8 onwards, 32-bit device from 9 onwards, // 32-bit simulator from 10 onwards. if (Triple.isArch64Bit()) this->TLSSupported = !Triple.isOSVersionLT(8); else if (Triple.isArch32Bit()) { if (!Triple.isSimulatorEnvironment()) this->TLSSupported = !Triple.isOSVersionLT(9); else this->TLSSupported = !Triple.isOSVersionLT(10); } } else if (Triple.isWatchOS()) { if (!Triple.isSimulatorEnvironment()) this->TLSSupported = !Triple.isOSVersionLT(2); else this->TLSSupported = !Triple.isOSVersionLT(3); } else if (Triple.isDriverKit()) { // No TLS on DriverKit. } else if (Triple.isXROS()) this->TLSSupported = true; this->MCountName = "\01mcount"; } const char *getStaticInitSectionSpecifier() const override { // FIXME: We should return 0 when building kexts. return "__TEXT,__StaticInit,regular,pure_instructions"; } /// Darwin does not support protected visibility. Darwin's "default" /// is very similar to ELF's "protected"; Darwin requires a "weak" /// attribute on declarations that can be dynamically replaced. bool hasProtectedVisibility() const override { return false; } unsigned getExnObjectAlignment() const override { // Older versions of libc++abi guarantee an alignment of only 8-bytes for // exception objects because of a bug in __cxa_exception that was // eventually fixed in r319123. llvm::VersionTuple MinVersion; const llvm::Triple &T = this->getTriple(); // Compute the earliest OS versions that have the fix to libc++abi. switch (T.getOS()) { case llvm::Triple::Darwin: case llvm::Triple::MacOSX: // Earliest supporting version is 10.14. MinVersion = llvm::VersionTuple(10U, 14U); break; case llvm::Triple::IOS: case llvm::Triple::TvOS: // Earliest supporting version is 12.0.0. MinVersion = llvm::VersionTuple(12U); break; case llvm::Triple::WatchOS: // Earliest supporting version is 5.0.0. MinVersion = llvm::VersionTuple(5U); break; case llvm::Triple::XROS: MinVersion = llvm::VersionTuple(0); break; default: // Conservatively return 8 bytes if OS is unknown. return 64; } if (T.getOSVersion() < MinVersion) return 64; return OSTargetInfo::getExnObjectAlignment(); } TargetInfo::IntType getLeastIntTypeByWidth(unsigned BitWidth, bool IsSigned) const final { // Darwin uses `long long` for `int_least64_t` and `int_fast64_t`. return BitWidth == 64 ? (IsSigned ? TargetInfo::SignedLongLong : TargetInfo::UnsignedLongLong) : TargetInfo::getLeastIntTypeByWidth(BitWidth, IsSigned); } bool areDefaultedSMFStillPOD(const LangOptions &) const override { return false; } }; // DragonFlyBSD Target template class LLVM_LIBRARY_VISIBILITY DragonFlyBSDTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // DragonFly defines; list based off of gcc output Builder.defineMacro("__DragonFly__"); Builder.defineMacro("__DragonFly_cc_version", "100001"); Builder.defineMacro("__KPRINTF_ATTRIBUTE__"); Builder.defineMacro("__tune_i386__"); DefineStd(Builder, "unix", Opts); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); } public: DragonFlyBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { switch (Triple.getArch()) { default: case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; this->MCountName = ".mcount"; break; } } }; #ifndef FREEBSD_CC_VERSION #define FREEBSD_CC_VERSION 0U #endif // FreeBSD Target template class LLVM_LIBRARY_VISIBILITY FreeBSDTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // FreeBSD defines; list based off of gcc output unsigned Release = Triple.getOSMajorVersion(); if (Release == 0U) Release = 8U; unsigned CCVersion = FREEBSD_CC_VERSION; if (CCVersion == 0U) CCVersion = Release * 100000U + 1U; Builder.defineMacro("__FreeBSD__", Twine(Release)); Builder.defineMacro("__FreeBSD_cc_version", Twine(CCVersion)); Builder.defineMacro("__KPRINTF_ATTRIBUTE__"); DefineStd(Builder, "unix", Opts); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); // On FreeBSD, wchar_t contains the number of the code point as // used by the character set of the locale. These character sets are // not necessarily a superset of ASCII. // // FIXME: This is wrong; the macro refers to the numerical values // of wchar_t *literals*, which are not locale-dependent. However, // FreeBSD systems apparently depend on us getting this wrong, and // setting this to 1 is conforming even if all the basic source // character literals have the same encoding as char and wchar_t. Builder.defineMacro("__STDC_MB_MIGHT_NEQ_WC__", "1"); } public: FreeBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { switch (Triple.getArch()) { case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; [[fallthrough]]; default: this->MCountName = ".mcount"; break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::ppc: case llvm::Triple::ppcle: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: this->MCountName = "_mcount"; break; case llvm::Triple::arm: this->MCountName = "__mcount"; break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: break; } } }; // GNU/kFreeBSD Target template class LLVM_LIBRARY_VISIBILITY KFreeBSDTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // GNU/kFreeBSD defines; list based off of gcc output DefineStd(Builder, "unix", Opts); Builder.defineMacro("__FreeBSD_kernel__"); Builder.defineMacro("__GLIBC__"); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); } public: using OSTargetInfo::OSTargetInfo; }; // Haiku Target template class LLVM_LIBRARY_VISIBILITY HaikuTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // Haiku defines; list based off of gcc output Builder.defineMacro("__HAIKU__"); DefineStd(Builder, "unix", Opts); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); } public: HaikuTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->SizeType = TargetInfo::UnsignedLong; this->IntPtrType = TargetInfo::SignedLong; this->PtrDiffType = TargetInfo::SignedLong; this->ProcessIDType = TargetInfo::SignedLong; switch (Triple.getArch()) { default: break; case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; break; } } }; // Hurd target template class LLVM_LIBRARY_VISIBILITY HurdTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // Hurd defines; list based off of gcc output. DefineStd(Builder, "unix", Opts); Builder.defineMacro("__GNU__"); Builder.defineMacro("__gnu_hurd__"); Builder.defineMacro("__MACH__"); Builder.defineMacro("__GLIBC__"); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); } public: using OSTargetInfo::OSTargetInfo; }; // Linux target template class LLVM_LIBRARY_VISIBILITY LinuxTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // Linux defines; list based off of gcc output DefineStd(Builder, "unix", Opts); DefineStd(Builder, "linux", Opts); if (Triple.isAndroid()) { Builder.defineMacro("__ANDROID__", "1"); this->PlatformName = "android"; this->PlatformMinVersion = Triple.getEnvironmentVersion(); const unsigned Maj = this->PlatformMinVersion.getMajor(); if (Maj) { Builder.defineMacro("__ANDROID_MIN_SDK_VERSION__", Twine(Maj)); // This historical but ambiguous name for the minSdkVersion macro. Keep // defined for compatibility. Builder.defineMacro("__ANDROID_API__", "__ANDROID_MIN_SDK_VERSION__"); } } else { Builder.defineMacro("__gnu_linux__"); } if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); + if (Triple.isTime64ABI()) { + Builder.defineMacro("_FILE_OFFSET_BITS", "64"); + Builder.defineMacro("_TIME_BITS", "64"); + } } public: LinuxTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WIntType = TargetInfo::UnsignedInt; switch (Triple.getArch()) { default: break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::ppc: case llvm::Triple::ppcle: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: this->MCountName = "_mcount"; break; case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; break; } } const char *getStaticInitSectionSpecifier() const override { return ".text.startup"; } }; // NetBSD Target template class LLVM_LIBRARY_VISIBILITY NetBSDTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // NetBSD defines; list based off of gcc output Builder.defineMacro("__NetBSD__"); Builder.defineMacro("__unix__"); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); } public: NetBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->MCountName = "__mcount"; switch (Triple.getArch()) { default: break; case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; break; } } }; // OpenBSD Target template class LLVM_LIBRARY_VISIBILITY OpenBSDTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // OpenBSD defines; list based off of gcc output Builder.defineMacro("__OpenBSD__"); DefineStd(Builder, "unix", Opts); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); if (Opts.C11) Builder.defineMacro("__STDC_NO_THREADS__"); } public: OpenBSDTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WCharType = this->WIntType = this->SignedInt; this->IntMaxType = TargetInfo::SignedLongLong; this->Int64Type = TargetInfo::SignedLongLong; switch (Triple.getArch()) { case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; [[fallthrough]]; default: this->MCountName = "__mcount"; break; case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: case llvm::Triple::sparcv9: this->MCountName = "_mcount"; break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: break; } } }; // PS3 PPU Target template class LLVM_LIBRARY_VISIBILITY PS3PPUTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // PS3 PPU defines. Builder.defineMacro("__PPU__"); Builder.defineMacro("__CELLOS_LV2__"); Builder.defineMacro("__LP32__"); Builder.defineMacro("_ARCH_PPC64"); Builder.defineMacro("__powerpc64__"); } public: PS3PPUTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->LongWidth = this->LongAlign = 32; this->PointerWidth = this->PointerAlign = 32; this->IntMaxType = TargetInfo::SignedLongLong; this->Int64Type = TargetInfo::SignedLongLong; this->SizeType = TargetInfo::UnsignedInt; this->resetDataLayout("E-m:e-p:32:32-Fi64-i64:64-n32:64"); } }; // Common base class for PS4/PS5 targets. template class LLVM_LIBRARY_VISIBILITY PSOSTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { Builder.defineMacro("__FreeBSD__", "9"); Builder.defineMacro("__FreeBSD_cc_version", "900001"); Builder.defineMacro("__KPRINTF_ATTRIBUTE__"); DefineStd(Builder, "unix", Opts); Builder.defineMacro("__SCE__"); Builder.defineMacro("__STDC_NO_COMPLEX__"); Builder.defineMacro("__STDC_NO_THREADS__"); } public: PSOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedShort; // On PS4/PS5, TLS variable cannot be aligned to more than 32 bytes (256 // bits). this->MaxTLSAlign = 256; // On PS4/PS5, do not honor explicit bit field alignment, // as in "__attribute__((aligned(2))) int b : 1;". this->UseExplicitBitFieldAlignment = false; this->MCountName = ".mcount"; this->NewAlign = 256; this->SuitableAlign = 256; } TargetInfo::CallingConvCheckResult checkCallingConvention(CallingConv CC) const override { return (CC == CC_C) ? TargetInfo::CCCR_OK : TargetInfo::CCCR_Error; } bool areDefaultedSMFStillPOD(const LangOptions &) const override { return false; } }; // PS4 Target template class LLVM_LIBRARY_VISIBILITY PS4OSTargetInfo : public PSOSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // Start with base class defines. PSOSTargetInfo::getOSDefines(Opts, Triple, Builder); Builder.defineMacro("__ORBIS__"); } public: using PSOSTargetInfo::PSOSTargetInfo; }; // PS5 Target template class LLVM_LIBRARY_VISIBILITY PS5OSTargetInfo : public PSOSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // Start with base class defines. PSOSTargetInfo::getOSDefines(Opts, Triple, Builder); Builder.defineMacro("__PROSPERO__"); } public: using PSOSTargetInfo::PSOSTargetInfo; }; // RTEMS Target template class LLVM_LIBRARY_VISIBILITY RTEMSTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // RTEMS defines; list based off of gcc output Builder.defineMacro("__rtems__"); if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); } public: RTEMSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { switch (Triple.getArch()) { default: case llvm::Triple::x86: // this->MCountName = ".mcount"; break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::ppc: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: // this->MCountName = "_mcount"; break; case llvm::Triple::arm: // this->MCountName = "__mcount"; break; } } }; // Solaris target template class LLVM_LIBRARY_VISIBILITY SolarisTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { DefineStd(Builder, "sun", Opts); DefineStd(Builder, "unix", Opts); Builder.defineMacro("__svr4__"); Builder.defineMacro("__SVR4"); // Solaris headers require _XOPEN_SOURCE to be set to 600 for C99 and // newer, but to 500 for everything else. feature_test.h has a check to // ensure that you are not using C99 with an old version of X/Open or C89 // with a new version. if (Opts.C99) Builder.defineMacro("_XOPEN_SOURCE", "600"); else Builder.defineMacro("_XOPEN_SOURCE", "500"); if (Opts.CPlusPlus) { Builder.defineMacro("__C99FEATURES__"); Builder.defineMacro("_FILE_OFFSET_BITS", "64"); } // GCC restricts the next two to C++. Builder.defineMacro("_LARGEFILE_SOURCE"); Builder.defineMacro("_LARGEFILE64_SOURCE"); Builder.defineMacro("__EXTENSIONS__"); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); } public: SolarisTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { if (this->PointerWidth == 64) { this->WCharType = this->WIntType = this->SignedInt; } else { this->WCharType = this->WIntType = this->SignedLong; } switch (Triple.getArch()) { default: break; case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; break; } } }; // AIX Target template class AIXTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { DefineStd(Builder, "unix", Opts); Builder.defineMacro("_IBMR2"); Builder.defineMacro("_POWER"); Builder.defineMacro("__THW_BIG_ENDIAN__"); Builder.defineMacro("_AIX"); Builder.defineMacro("__TOS_AIX__"); Builder.defineMacro("__HOS_AIX__"); if (Opts.C11) { Builder.defineMacro("__STDC_NO_ATOMICS__"); Builder.defineMacro("__STDC_NO_THREADS__"); } if (Opts.EnableAIXExtendedAltivecABI) Builder.defineMacro("__EXTABI__"); VersionTuple OsVersion = Triple.getOSVersion(); // Define AIX OS-Version Macros. // Includes logic for legacy versions of AIX; no specific intent to support. if (OsVersion >= VersionTuple(3, 2)) Builder.defineMacro("_AIX32"); if (OsVersion >= VersionTuple(4, 1)) Builder.defineMacro("_AIX41"); if (OsVersion >= VersionTuple(4, 3)) Builder.defineMacro("_AIX43"); if (OsVersion >= VersionTuple(5, 0)) Builder.defineMacro("_AIX50"); if (OsVersion >= VersionTuple(5, 1)) Builder.defineMacro("_AIX51"); if (OsVersion >= VersionTuple(5, 2)) Builder.defineMacro("_AIX52"); if (OsVersion >= VersionTuple(5, 3)) Builder.defineMacro("_AIX53"); if (OsVersion >= VersionTuple(6, 1)) Builder.defineMacro("_AIX61"); if (OsVersion >= VersionTuple(7, 1)) Builder.defineMacro("_AIX71"); if (OsVersion >= VersionTuple(7, 2)) Builder.defineMacro("_AIX72"); if (OsVersion >= VersionTuple(7, 3)) Builder.defineMacro("_AIX73"); // FIXME: Do not define _LONG_LONG when -fno-long-long is specified. Builder.defineMacro("_LONG_LONG"); if (Opts.POSIXThreads) { Builder.defineMacro("_THREAD_SAFE"); } if (this->PointerWidth == 64) { Builder.defineMacro("__64BIT__"); } // Define _WCHAR_T when it is a fundamental type // (i.e., for C++ without -fno-wchar). if (Opts.CPlusPlus && Opts.WChar) { Builder.defineMacro("_WCHAR_T"); } } public: AIXTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->MCountName = "__mcount"; this->TheCXXABI.set(TargetCXXABI::XL); if (this->PointerWidth == 64) { this->WCharType = this->UnsignedInt; } else { this->WCharType = this->UnsignedShort; } this->UseZeroLengthBitfieldAlignment = true; } // AIX sets FLT_EVAL_METHOD to be 1. LangOptions::FPEvalMethodKind getFPEvalMethod() const override { return LangOptions::FPEvalMethodKind::FEM_Double; } bool defaultsToAIXPowerAlignment() const override { return true; } bool areDefaultedSMFStillPOD(const LangOptions &) const override { return false; } }; // z/OS target template class LLVM_LIBRARY_VISIBILITY ZOSTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // FIXME: _LONG_LONG should not be defined under -std=c89. Builder.defineMacro("_LONG_LONG"); Builder.defineMacro("__370__"); Builder.defineMacro("__BFP__"); // FIXME: __BOOL__ should not be defined under -std=c89. Builder.defineMacro("__BOOL__"); Builder.defineMacro("__COMPILER_VER__", "0x50000000"); Builder.defineMacro("__LONGNAME__"); Builder.defineMacro("__MVS__"); Builder.defineMacro("__THW_370__"); Builder.defineMacro("__THW_BIG_ENDIAN__"); Builder.defineMacro("__TOS_390__"); Builder.defineMacro("__TOS_MVS__"); Builder.defineMacro("__XPLINK__"); if (this->PointerWidth == 64) Builder.defineMacro("__64BIT__"); if (Opts.CPlusPlus && Opts.WChar) { // Macro __wchar_t is defined so that the wchar_t data // type is not declared as a typedef in system headers. Builder.defineMacro("__wchar_t"); } this->PlatformName = llvm::Triple::getOSTypeName(Triple.getOS()); } public: ZOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedInt; this->MaxAlignedAttribute = 128; this->UseBitFieldTypeAlignment = false; this->UseZeroLengthBitfieldAlignment = true; this->UseLeadingZeroLengthBitfield = false; this->ZeroLengthBitfieldBoundary = 32; this->TheCXXABI.set(TargetCXXABI::XL); } bool areDefaultedSMFStillPOD(const LangOptions &) const override { return false; } }; void addWindowsDefines(const llvm::Triple &Triple, const LangOptions &Opts, MacroBuilder &Builder); // Windows target template class LLVM_LIBRARY_VISIBILITY WindowsTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { addWindowsDefines(Triple, Opts, Builder); } public: WindowsTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WCharType = TargetInfo::UnsignedShort; this->WIntType = TargetInfo::UnsignedShort; } }; template class LLVM_LIBRARY_VISIBILITY NaClTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); DefineStd(Builder, "unix", Opts); Builder.defineMacro("__native_client__"); } public: NaClTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->LongAlign = 32; this->LongWidth = 32; this->PointerAlign = 32; this->PointerWidth = 32; this->IntMaxType = TargetInfo::SignedLongLong; this->Int64Type = TargetInfo::SignedLongLong; this->DoubleAlign = 64; this->LongDoubleWidth = 64; this->LongDoubleAlign = 64; this->LongLongWidth = 64; this->LongLongAlign = 64; this->SizeType = TargetInfo::UnsignedInt; this->PtrDiffType = TargetInfo::SignedInt; this->IntPtrType = TargetInfo::SignedInt; // RegParmMax is inherited from the underlying architecture. this->LongDoubleFormat = &llvm::APFloat::IEEEdouble(); if (Triple.getArch() == llvm::Triple::arm) { // Handled in ARM's setABI(). } else if (Triple.getArch() == llvm::Triple::x86) { this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-" "i64:64-i128:128-n8:16:32-S128"); } else if (Triple.getArch() == llvm::Triple::x86_64) { this->resetDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-" "i64:64-i128:128-n8:16:32:64-S128"); } else if (Triple.getArch() == llvm::Triple::mipsel) { // Handled on mips' setDataLayout. } else { assert(Triple.getArch() == llvm::Triple::le32); this->resetDataLayout("e-p:32:32-i64:64"); } } }; // Fuchsia Target template class LLVM_LIBRARY_VISIBILITY FuchsiaTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { Builder.defineMacro("__Fuchsia__"); if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); // Required by the libc++ locale support. if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); Builder.defineMacro("__Fuchsia_API_level__", Twine(Opts.FuchsiaAPILevel)); this->PlatformName = "fuchsia"; this->PlatformMinVersion = VersionTuple(Opts.FuchsiaAPILevel); } public: FuchsiaTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WIntType = TargetInfo::UnsignedInt; this->MCountName = "__mcount"; this->TheCXXABI.set(TargetCXXABI::Fuchsia); } }; // WebAssembly target template class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // A common platform macro. if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); // Follow g++ convention and predefine _GNU_SOURCE for C++. if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); // Indicate that we have __float128. Builder.defineMacro("__FLOAT128__"); } public: explicit WebAssemblyOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->MCountName = "__mcount"; this->TheCXXABI.set(TargetCXXABI::WebAssembly); this->HasFloat128 = true; } }; // WASI target template class LLVM_LIBRARY_VISIBILITY WASITargetInfo : public WebAssemblyOSTargetInfo { void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const final { WebAssemblyOSTargetInfo::getOSDefines(Opts, Triple, Builder); Builder.defineMacro("__wasi__"); } public: using WebAssemblyOSTargetInfo::WebAssemblyOSTargetInfo; }; // Emscripten target template class LLVM_LIBRARY_VISIBILITY EmscriptenTargetInfo : public WebAssemblyOSTargetInfo { void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const final { WebAssemblyOSTargetInfo::getOSDefines(Opts, Triple, Builder); DefineStd(Builder, "unix", Opts); Builder.defineMacro("__EMSCRIPTEN__"); if (Opts.POSIXThreads) Builder.defineMacro("__EMSCRIPTEN_PTHREADS__"); } public: explicit EmscriptenTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : WebAssemblyOSTargetInfo(Triple, Opts) { // Keeping the alignment of long double to 8 bytes even though its size is // 16 bytes allows emscripten to have an 8-byte-aligned max_align_t which // in turn gives is a 8-byte aligned malloc. // Emscripten's ABI is unstable and we may change this back to 128 to match // the WebAssembly default in the future. this->LongDoubleAlign = 64; } }; // OHOS target template class LLVM_LIBRARY_VISIBILITY OHOSTargetInfo : public OSTargetInfo { protected: void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, MacroBuilder &Builder) const override { // Linux defines; list based off of gcc output DefineStd(Builder, "unix", Opts); // Generic OHOS target defines if (Triple.isOHOSFamily()) { Builder.defineMacro("__OHOS_FAMILY__", "1"); auto Version = Triple.getEnvironmentVersion(); this->PlatformName = "ohos"; this->PlatformMinVersion = Version; Builder.defineMacro("__OHOS_Major__", Twine(Version.getMajor())); if (auto Minor = Version.getMinor()) Builder.defineMacro("__OHOS_Minor__", Twine(*Minor)); if (auto Subminor = Version.getSubminor()) Builder.defineMacro("__OHOS_Micro__", Twine(*Subminor)); } if (Triple.isOpenHOS()) Builder.defineMacro("__OHOS__"); if (Triple.isOSLinux()) { DefineStd(Builder, "linux", Opts); } else if (Triple.isOSLiteOS()) { Builder.defineMacro("__LITEOS__"); } if (Opts.POSIXThreads) Builder.defineMacro("_REENTRANT"); if (Opts.CPlusPlus) Builder.defineMacro("_GNU_SOURCE"); if (this->HasFloat128) Builder.defineMacro("__FLOAT128__"); } public: OHOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { this->WIntType = TargetInfo::UnsignedInt; switch (Triple.getArch()) { default: break; case llvm::Triple::x86: case llvm::Triple::x86_64: this->HasFloat128 = true; break; } } const char *getStaticInitSectionSpecifier() const override { return ".text.startup"; } }; } // namespace targets } // namespace clang #endif // LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp index 5639239359ab..86d47054615e 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGBuiltin.cpp @@ -1,22083 +1,22078 @@ //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This contains code to emit Builtin calls as LLVM code. // //===----------------------------------------------------------------------===// #include "ABIInfo.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGHLSLRuntime.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" #include "PatternInit.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/OSLog.h" #include "clang/AST/OperationKinds.h" #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TargetOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/FloatingPointMode.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/IntrinsicsHexagon.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/IntrinsicsR600.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/IntrinsicsS390.h" #include "llvm/IR/IntrinsicsVE.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/MemoryModelRelaxationAnnotations.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/X86TargetParser.h" #include #include using namespace clang; using namespace CodeGen; using namespace llvm; static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, Align AlignmentInBytes) { ConstantInt *Byte; switch (CGF.getLangOpts().getTrivialAutoVarInit()) { case LangOptions::TrivialAutoVarInitKind::Uninitialized: // Nothing to initialize. return; case LangOptions::TrivialAutoVarInitKind::Zero: Byte = CGF.Builder.getInt8(0x00); break; case LangOptions::TrivialAutoVarInitKind::Pattern: { llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext()); Byte = llvm::dyn_cast( initializationPatternFor(CGF.CGM, Int8)); break; } } if (CGF.CGM.stopAutoInit()) return; auto *I = CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes); I->addAnnotationMetadata("auto-init"); } /// getBuiltinLibFunction - Given a builtin id for a function like /// "__builtin_fabsf", return a Function* for "fabsf". llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID) { assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); // Get the name, skip over the __builtin_ prefix (if necessary). StringRef Name; GlobalDecl D(FD); // TODO: This list should be expanded or refactored after all GCC-compatible // std libcall builtins are implemented. static SmallDenseMap F128Builtins{ {Builtin::BI__builtin___fprintf_chk, "__fprintf_chkieee128"}, {Builtin::BI__builtin___printf_chk, "__printf_chkieee128"}, {Builtin::BI__builtin___snprintf_chk, "__snprintf_chkieee128"}, {Builtin::BI__builtin___sprintf_chk, "__sprintf_chkieee128"}, {Builtin::BI__builtin___vfprintf_chk, "__vfprintf_chkieee128"}, {Builtin::BI__builtin___vprintf_chk, "__vprintf_chkieee128"}, {Builtin::BI__builtin___vsnprintf_chk, "__vsnprintf_chkieee128"}, {Builtin::BI__builtin___vsprintf_chk, "__vsprintf_chkieee128"}, {Builtin::BI__builtin_fprintf, "__fprintfieee128"}, {Builtin::BI__builtin_printf, "__printfieee128"}, {Builtin::BI__builtin_snprintf, "__snprintfieee128"}, {Builtin::BI__builtin_sprintf, "__sprintfieee128"}, {Builtin::BI__builtin_vfprintf, "__vfprintfieee128"}, {Builtin::BI__builtin_vprintf, "__vprintfieee128"}, {Builtin::BI__builtin_vsnprintf, "__vsnprintfieee128"}, {Builtin::BI__builtin_vsprintf, "__vsprintfieee128"}, {Builtin::BI__builtin_fscanf, "__fscanfieee128"}, {Builtin::BI__builtin_scanf, "__scanfieee128"}, {Builtin::BI__builtin_sscanf, "__sscanfieee128"}, {Builtin::BI__builtin_vfscanf, "__vfscanfieee128"}, {Builtin::BI__builtin_vscanf, "__vscanfieee128"}, {Builtin::BI__builtin_vsscanf, "__vsscanfieee128"}, {Builtin::BI__builtin_nexttowardf128, "__nexttowardieee128"}, }; // The AIX library functions frexpl, ldexpl, and modfl are for 128-bit // IBM 'long double' (i.e. __ibm128). Map to the 'double' versions // if it is 64-bit 'long double' mode. static SmallDenseMap AIXLongDouble64Builtins{ {Builtin::BI__builtin_frexpl, "frexp"}, {Builtin::BI__builtin_ldexpl, "ldexp"}, {Builtin::BI__builtin_modfl, "modf"}, }; // If the builtin has been declared explicitly with an assembler label, // use the mangled name. This differs from the plain label on platforms // that prefix labels. if (FD->hasAttr()) Name = getMangledName(D); else { // TODO: This mutation should also be applied to other targets other than // PPC, after backend supports IEEE 128-bit style libcalls. if (getTriple().isPPC64() && &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad() && F128Builtins.contains(BuiltinID)) Name = F128Builtins[BuiltinID]; else if (getTriple().isOSAIX() && &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEdouble() && AIXLongDouble64Builtins.contains(BuiltinID)) Name = AIXLongDouble64Builtins[BuiltinID]; else Name = Context.BuiltinInfo.getName(BuiltinID).substr(10); } llvm::FunctionType *Ty = cast(getTypes().ConvertType(FD->getType())); return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); } /// Emit the conversions required to turn the given value into an /// integer of the given size. static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType) { V = CGF.EmitToMemory(V, T); if (V->getType()->isPointerTy()) return CGF.Builder.CreatePtrToInt(V, IntType); assert(V->getType() == IntType); return V; } static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType) { V = CGF.EmitFromMemory(V, T); if (ResultType->isPointerTy()) return CGF.Builder.CreateIntToPtr(V, ResultType); assert(V->getType() == ResultType); return V; } static Address CheckAtomicAlignment(CodeGenFunction &CGF, const CallExpr *E) { ASTContext &Ctx = CGF.getContext(); Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0)); unsigned Bytes = Ptr.getElementType()->isPointerTy() ? Ctx.getTypeSizeInChars(Ctx.VoidPtrTy).getQuantity() : Ptr.getElementType()->getScalarSizeInBits() / 8; unsigned Align = Ptr.getAlignment().getQuantity(); if (Align % Bytes != 0) { DiagnosticsEngine &Diags = CGF.CGM.getDiags(); Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned); // Force address to be at least naturally-aligned. return Ptr.withAlignment(CharUnits::fromQuantity(Bytes)); } return Ptr; } /// Utility to insert an atomic instruction based on Intrinsic::ID /// and the expression node. static Value *MakeBinaryAtomicValue( CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); Address DestAddr = CheckAtomicAlignment(CGF, E); llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); llvm::Type *ValueType = Val->getType(); Val = EmitToInt(CGF, Val, T, IntType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering); return EmitFromInt(CGF, Result, T, ValueType); } static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { Value *Val = CGF.EmitScalarExpr(E->getArg(0)); Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1)); Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType()); LV.setNontemporal(true); CGF.EmitStoreOfScalar(Val, LV, false); return nullptr; } static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0)); LValue LV = CGF.MakeAddrLValue(Addr, E->getType()); LV.setNontemporal(true); return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); } static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E) { return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); } /// Utility to insert an atomic instruction based Intrinsic::ID and /// the expression node, where the return value is the result of the /// operation. static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert = false) { QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); Address DestAddr = CheckAtomicAlignment(CGF, E); llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1)); llvm::Type *ValueType = Val->getType(); Val = EmitToInt(CGF, Val, T, IntType); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent); Result = CGF.Builder.CreateBinOp(Op, Result, Val); if (Invert) Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, llvm::ConstantInt::getAllOnesValue(IntType)); Result = EmitFromInt(CGF, Result, T, ValueType); return RValue::get(Result); } /// Utility to insert an atomic cmpxchg instruction. /// /// @param CGF The current codegen function. /// @param E Builtin call expression to convert to cmpxchg. /// arg0 - address to operate on /// arg1 - value to compare with /// arg2 - new value /// @param ReturnBool Specifies whether to return success flag of /// cmpxchg result or the old value. /// /// @returns result of cmpxchg, according to ReturnBool /// /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics /// invoke the function EmitAtomicCmpXchgForMSIntrin. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); Address DestAddr = CheckAtomicAlignment(CGF, E); llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); Value *Cmp = CGF.EmitScalarExpr(E->getArg(1)); llvm::Type *ValueType = Cmp->getType(); Cmp = EmitToInt(CGF, Cmp, T, IntType); Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); Value *Pair = CGF.Builder.CreateAtomicCmpXchg( DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent, llvm::AtomicOrdering::SequentiallyConsistent); if (ReturnBool) // Extract boolean success flag and zext it to int. return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), CGF.ConvertType(E->getType())); else // Extract old value and emit it using the same type as compare value. return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, ValueType); } /// This function should be invoked to emit atomic cmpxchg for Microsoft's /// _InterlockedCompareExchange* intrinsics which have the following signature: /// T _InterlockedCompareExchange(T volatile *Destination, /// T Exchange, /// T Comparand); /// /// Whereas the llvm 'cmpxchg' instruction has the following syntax: /// cmpxchg *Destination, Comparand, Exchange. /// So we need to swap Comparand and Exchange when invoking /// CreateAtomicCmpXchg. That is the reason we could not use the above utility /// function MakeAtomicCmpXchgValue since it expects the arguments to be /// already swapped. static Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) { assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType( E->getType(), E->getArg(0)->getType()->getPointeeType())); assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), E->getArg(1)->getType())); assert(CGF.getContext().hasSameUnqualifiedType(E->getType(), E->getArg(2)->getType())); Address DestAddr = CheckAtomicAlignment(CGF, E); auto *Comparand = CGF.EmitScalarExpr(E->getArg(2)); auto *Exchange = CGF.EmitScalarExpr(E->getArg(1)); // For Release ordering, the failure ordering should be Monotonic. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? AtomicOrdering::Monotonic : SuccessOrdering; // The atomic instruction is marked volatile for consistency with MSVC. This // blocks the few atomics optimizations that LLVM has. If we want to optimize // _Interlocked* operations in the future, we will have to remove the volatile // marker. auto *Result = CGF.Builder.CreateAtomicCmpXchg( DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering); Result->setVolatile(true); return CGF.Builder.CreateExtractValue(Result, 0); } // 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are // prototyped like this: // // unsigned char _InterlockedCompareExchange128...( // __int64 volatile * _Destination, // __int64 _ExchangeHigh, // __int64 _ExchangeLow, // __int64 * _ComparandResult); // // Note that Destination is assumed to be at least 16-byte aligned, despite // being typed int64. static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering) { assert(E->getNumArgs() == 4); llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2)); Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3)); assert(DestPtr->getType()->isPointerTy()); assert(!ExchangeHigh->getType()->isPointerTy()); assert(!ExchangeLow->getType()->isPointerTy()); // For Release ordering, the failure ordering should be Monotonic. auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ? AtomicOrdering::Monotonic : SuccessOrdering; // Convert to i128 pointers and values. Alignment is also overridden for // destination pointer. llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128); Address DestAddr(DestPtr, Int128Ty, CGF.getContext().toCharUnitsFromBits(128)); ComparandAddr = ComparandAddr.withElementType(Int128Ty); // (((i128)hi) << 64) | ((i128)lo) ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty); ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty); ExchangeHigh = CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64)); llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow); // Load the comparand for the instruction. llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr); auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering); // The atomic instruction is marked volatile for consistency with MSVC. This // blocks the few atomics optimizations that LLVM has. If we want to optimize // _Interlocked* operations in the future, we will have to remove the volatile // marker. CXI->setVolatile(true); // Store the result as an outparameter. CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0), ComparandAddr); // Get the success boolean and zero extend it to i8. Value *Success = CGF.Builder.CreateExtractValue(CXI, 1); return CGF.Builder.CreateZExt(Success, CGF.Int8Ty); } static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { assert(E->getArg(0)->getType()->isPointerType()); auto *IntTy = CGF.ConvertType(E->getType()); Address DestAddr = CheckAtomicAlignment(CGF, E); auto *Result = CGF.Builder.CreateAtomicRMW( AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering); return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1)); } static Value *EmitAtomicDecrementValue( CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { assert(E->getArg(0)->getType()->isPointerType()); auto *IntTy = CGF.ConvertType(E->getType()); Address DestAddr = CheckAtomicAlignment(CGF, E); auto *Result = CGF.Builder.CreateAtomicRMW( AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering); return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1)); } // Build a plain volatile load. static Value *EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E) { Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); QualType ElTy = E->getArg(0)->getType()->getPointeeType(); CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy); llvm::Type *ITy = llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8); llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(ITy, Ptr, LoadSize); Load->setVolatile(true); return Load; } // Build a plain volatile store. static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); Value *Value = CGF.EmitScalarExpr(E->getArg(1)); QualType ElTy = E->getArg(0)->getType()->getPointeeType(); CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy); llvm::StoreInst *Store = CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize); Store->setVolatile(true); return Store; } // Emit a simple mangled intrinsic that has 1 argument and a return type // matching the argument type. Depending on mode, this may be a constrained // floating-point intrinsic. static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, Src0); } } // Emit an intrinsic that has 2 operands of the same type as its result. // Depending on mode, this may be a constrained floating-point intrinsic. static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1 }); } } // Has second type mangled argument. static Value *emitBinaryExpMaybeConstrainedFPBuiltin( CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID, llvm::Intrinsic::ID ConstrainedIntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0, Src1}); } Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), Src1->getType()}); return CGF.Builder.CreateCall(F, {Src0, Src1}); } // Emit an intrinsic that has 3 operands of the same type as its result. // Depending on mode, this may be a constrained floating-point intrinsic. static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); if (CGF.Builder.getIsFPConstrained()) { Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); } } // Emit an intrinsic where all operands are of the same type as the result. // Depending on mode, this may be a constrained floating-point intrinsic. static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID, llvm::Type *Ty, ArrayRef Args) { Function *F; if (CGF.Builder.getIsFPConstrained()) F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Ty); else F = CGF.CGM.getIntrinsic(IntrinsicID, Ty); if (CGF.Builder.getIsFPConstrained()) return CGF.Builder.CreateConstrainedFPCall(F, Args); else return CGF.Builder.CreateCall(F, Args); } // Emit a simple intrinsic that has N scalar arguments and a return type // matching the argument type. It is assumed that only the first argument is // overloaded. template Value *emitBuiltinWithOneOverloadedType(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name = "") { static_assert(N, "expect non-empty argument"); SmallVector Args; for (unsigned I = 0; I < N; ++I) Args.push_back(CGF.EmitScalarExpr(E->getArg(I))); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Args[0]->getType()); return CGF.Builder.CreateCall(F, Args, Name); } // Emit an intrinsic that has 1 float or double operand, and 1 integer. static Value *emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); return CGF.Builder.CreateCall(F, {Src0, Src1}); } // Emit an intrinsic that has overloaded integer result and fp operand. static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { llvm::Type *ResultType = CGF.ConvertType(E->getType()); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {ResultType, Src0->getType()}); return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()}); return CGF.Builder.CreateCall(F, Src0); } } static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Intrinsic::ID IntrinsicID) { llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); QualType IntPtrTy = E->getArg(1)->getType()->getPointeeType(); llvm::Type *IntTy = CGF.ConvertType(IntPtrTy); llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {Src0->getType(), IntTy}); llvm::Value *Call = CGF.Builder.CreateCall(F, Src0); llvm::Value *Exp = CGF.Builder.CreateExtractValue(Call, 1); LValue LV = CGF.MakeNaturalAlignAddrLValue(Src1, IntPtrTy); CGF.EmitStoreOfScalar(Exp, LV); return CGF.Builder.CreateExtractValue(Call, 0); } /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); Call->setDoesNotAccessMemory(); return Call; } /// Emit the computation of the sign bit for a floating point value. Returns /// the i1 sign bit value. static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { LLVMContext &C = CGF.CGM.getLLVMContext(); llvm::Type *Ty = V->getType(); int Width = Ty->getPrimitiveSizeInBits(); llvm::Type *IntTy = llvm::IntegerType::get(C, Width); V = CGF.Builder.CreateBitCast(V, IntTy); if (Ty->isPPC_FP128Ty()) { // We want the sign bit of the higher-order double. The bitcast we just // did works as if the double-double was stored to memory and then // read as an i128. The "store" will put the higher-order double in the // lower address in both little- and big-Endian modes, but the "load" // will treat those bits as a different part of the i128: the low bits in // little-Endian, the high bits in big-Endian. Therefore, on big-Endian // we need to shift the high bits down to the low before truncating. Width >>= 1; if (CGF.getTarget().isBigEndian()) { Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); V = CGF.Builder.CreateLShr(V, ShiftCst); } // We are truncating value in order to extract the higher-order // double, which we will be using to extract the sign from. IntTy = llvm::IntegerType::get(C, Width); V = CGF.Builder.CreateTrunc(V, IntTy); } Value *Zero = llvm::Constant::getNullValue(IntTy); return CGF.Builder.CreateICmpSLT(V, Zero); } static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD)); RValue Call = CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot()); // Check the supported intrinsic. if (unsigned BuiltinID = FD->getBuiltinID()) { auto IsErrnoIntrinsic = [&]() -> unsigned { switch (BuiltinID) { case Builtin::BIexpf: case Builtin::BI__builtin_expf: case Builtin::BI__builtin_expf128: return true; } // TODO: support more FP math libcalls return false; }(); // Restrict to target with errno, for example, MacOS doesn't set errno. if (IsErrnoIntrinsic && CGF.CGM.getLangOpts().MathErrno && !CGF.Builder.getIsFPConstrained()) { ASTContext &Context = CGF.getContext(); // Emit "int" TBAA metadata on FP math libcalls. clang::QualType IntTy = Context.IntTy; TBAAAccessInfo TBAAInfo = CGF.CGM.getTBAAAccessInfo(IntTy); Instruction *Inst = cast(Call.getScalarVal()); CGF.CGM.DecorateInstructionWithTBAA(Inst, TBAAInfo); } } return Call; } /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* /// depending on IntrinsicID. /// /// \arg CGF The current codegen function. /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. /// \arg X The first argument to the llvm.*.with.overflow.*. /// \arg Y The second argument to the llvm.*.with.overflow.*. /// \arg Carry The carry returned by the llvm.*.with.overflow.*. /// \returns The result (i.e. sum/product) returned by the intrinsic. static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry) { // Make sure we have integers of the same width. assert(X->getType() == Y->getType() && "Arguments must be the same type. (Did you forget to make sure both " "arguments have the same integer width?)"); Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); Carry = CGF.Builder.CreateExtractValue(Tmp, 1); return CGF.Builder.CreateExtractValue(Tmp, 0); } static Value *emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high) { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); llvm::CallInst *Call = CGF.Builder.CreateCall(F); llvm::ConstantRange CR(APInt(32, low), APInt(32, high)); Call->addRangeRetAttr(CR); Call->addRetAttr(llvm::Attribute::AttrKind::NoUndef); return Call; } namespace { struct WidthAndSignedness { unsigned Width; bool Signed; }; } static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type) { assert(Type->isIntegerType() && "Given type is not an integer."); unsigned Width = Type->isBooleanType() ? 1 : Type->isBitIntType() ? context.getIntWidth(Type) : context.getTypeInfo(Type).Width; bool Signed = Type->isSignedIntegerType(); return {Width, Signed}; } // Given one or more integer types, this function produces an integer type that // encompasses them: any value in one of the given types could be expressed in // the encompassing type. static struct WidthAndSignedness EncompassingIntegerType(ArrayRef Types) { assert(Types.size() > 0 && "Empty list of types."); // If any of the given types is signed, we must return a signed type. bool Signed = false; for (const auto &Type : Types) { Signed |= Type.Signed; } // The encompassing type must have a width greater than or equal to the width // of the specified types. Additionally, if the encompassing type is signed, // its width must be strictly greater than the width of any unsigned types // given. unsigned Width = 0; for (const auto &Type : Types) { unsigned MinWidth = Type.Width + (Signed && !Type.Signed); if (Width < MinWidth) { Width = MinWidth; } } return {Width, Signed}; } Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; return Builder.CreateCall(CGM.getIntrinsic(inst, {ArgValue->getType()}), ArgValue); } /// Checks if using the result of __builtin_object_size(p, @p From) in place of /// __builtin_object_size(p, @p To) is correct static bool areBOSTypesCompatible(int From, int To) { // Note: Our __builtin_object_size implementation currently treats Type=0 and // Type=2 identically. Encoding this implementation detail here may make // improving __builtin_object_size difficult in the future, so it's omitted. return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); } static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); } llvm::Value * CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, llvm::Value *EmittedE, bool IsDynamic) { uint64_t ObjectSize; if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic); return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); } const FieldDecl *CodeGenFunction::FindFlexibleArrayMemberFieldAndOffset( ASTContext &Ctx, const RecordDecl *RD, const FieldDecl *FAMDecl, uint64_t &Offset) { const LangOptions::StrictFlexArraysLevelKind StrictFlexArraysLevel = getLangOpts().getStrictFlexArraysLevel(); uint32_t FieldNo = 0; if (RD->isImplicit()) return nullptr; for (const FieldDecl *FD : RD->fields()) { if ((!FAMDecl || FD == FAMDecl) && Decl::isFlexibleArrayMemberLike( Ctx, FD, FD->getType(), StrictFlexArraysLevel, /*IgnoreTemplateOrMacroSubstitution=*/true)) { const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); Offset += Layout.getFieldOffset(FieldNo); return FD; } QualType Ty = FD->getType(); if (Ty->isRecordType()) { if (const FieldDecl *Field = FindFlexibleArrayMemberFieldAndOffset( Ctx, Ty->getAsRecordDecl(), FAMDecl, Offset)) { const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(RD); Offset += Layout.getFieldOffset(FieldNo); return Field; } } if (!RD->isUnion()) ++FieldNo; } return nullptr; } static unsigned CountCountedByAttrs(const RecordDecl *RD) { unsigned Num = 0; for (const FieldDecl *FD : RD->fields()) { if (FD->getType()->isCountAttributedType()) return ++Num; QualType Ty = FD->getType(); if (Ty->isRecordType()) Num += CountCountedByAttrs(Ty->getAsRecordDecl()); } return Num; } llvm::Value * CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType) { // The code generated here calculates the size of a struct with a flexible // array member that uses the counted_by attribute. There are two instances // we handle: // // struct s { // unsigned long flags; // int count; // int array[] __attribute__((counted_by(count))); // } // // 1) bdos of the flexible array itself: // // __builtin_dynamic_object_size(p->array, 1) == // p->count * sizeof(*p->array) // // 2) bdos of a pointer into the flexible array: // // __builtin_dynamic_object_size(&p->array[42], 1) == // (p->count - 42) * sizeof(*p->array) // // 2) bdos of the whole struct, including the flexible array: // // __builtin_dynamic_object_size(p, 1) == // max(sizeof(struct s), // offsetof(struct s, array) + p->count * sizeof(*p->array)) // ASTContext &Ctx = getContext(); const Expr *Base = E->IgnoreParenImpCasts(); const Expr *Idx = nullptr; if (const auto *UO = dyn_cast(Base); UO && UO->getOpcode() == UO_AddrOf) { Expr *SubExpr = UO->getSubExpr()->IgnoreParenImpCasts(); if (const auto *ASE = dyn_cast(SubExpr)) { Base = ASE->getBase()->IgnoreParenImpCasts(); Idx = ASE->getIdx()->IgnoreParenImpCasts(); if (const auto *IL = dyn_cast(Idx)) { int64_t Val = IL->getValue().getSExtValue(); if (Val < 0) return getDefaultBuiltinObjectSizeResult(Type, ResType); if (Val == 0) // The index is 0, so we don't need to take it into account. Idx = nullptr; } } else { // Potential pointer to another element in the struct. Base = SubExpr; } } // Get the flexible array member Decl. const RecordDecl *OuterRD = nullptr; const FieldDecl *FAMDecl = nullptr; if (const auto *ME = dyn_cast(Base)) { // Check if \p Base is referencing the FAM itself. const ValueDecl *VD = ME->getMemberDecl(); OuterRD = VD->getDeclContext()->getOuterLexicalRecordContext(); FAMDecl = dyn_cast(VD); if (!FAMDecl) return nullptr; } else if (const auto *DRE = dyn_cast(Base)) { // Check if we're pointing to the whole struct. QualType Ty = DRE->getDecl()->getType(); if (Ty->isPointerType()) Ty = Ty->getPointeeType(); OuterRD = Ty->getAsRecordDecl(); // If we have a situation like this: // // struct union_of_fams { // int flags; // union { // signed char normal_field; // struct { // int count1; // int arr1[] __counted_by(count1); // }; // struct { // signed char count2; // int arr2[] __counted_by(count2); // }; // }; // }; // // We don't know which 'count' to use in this scenario: // // size_t get_size(struct union_of_fams *p) { // return __builtin_dynamic_object_size(p, 1); // } // // Instead of calculating a wrong number, we give up. if (OuterRD && CountCountedByAttrs(OuterRD) > 1) return nullptr; } if (!OuterRD) return nullptr; // We call FindFlexibleArrayMemberAndOffset even if FAMDecl is non-null to // get its offset. uint64_t Offset = 0; FAMDecl = FindFlexibleArrayMemberFieldAndOffset(Ctx, OuterRD, FAMDecl, Offset); Offset = Ctx.toCharUnitsFromBits(Offset).getQuantity(); if (!FAMDecl || !FAMDecl->getType()->isCountAttributedType()) // No flexible array member found or it doesn't have the "counted_by" // attribute. return nullptr; const FieldDecl *CountedByFD = FindCountedByField(FAMDecl); if (!CountedByFD) // Can't find the field referenced by the "counted_by" attribute. return nullptr; + if (isa(Base)) + // The whole struct is specificed in the __bdos. The calculation of the + // whole size of the structure can be done in two ways: + // + // 1) sizeof(struct S) + count * sizeof(typeof(fam)) + // 2) offsetof(struct S, fam) + count * sizeof(typeof(fam)) + // + // The first will add additional padding after the end of the array, + // allocation while the second method is more precise, but not quite + // expected from programmers. See + // https://lore.kernel.org/lkml/ZvV6X5FPBBW7CO1f@archlinux/ for a + // discussion of the topic. + // + // GCC isn't (currently) able to calculate __bdos on a pointer to the whole + // structure. Therefore, because of the above issue, we'll choose to match + // what GCC does for consistency's sake. + return nullptr; + // Build a load of the counted_by field. bool IsSigned = CountedByFD->getType()->isSignedIntegerType(); Value *CountedByInst = EmitCountedByFieldExpr(Base, FAMDecl, CountedByFD); if (!CountedByInst) return getDefaultBuiltinObjectSizeResult(Type, ResType); CountedByInst = Builder.CreateIntCast(CountedByInst, ResType, IsSigned); // Build a load of the index and subtract it from the count. Value *IdxInst = nullptr; if (Idx) { if (Idx->HasSideEffects(getContext())) // We can't have side-effects. return getDefaultBuiltinObjectSizeResult(Type, ResType); bool IdxSigned = Idx->getType()->isSignedIntegerType(); IdxInst = EmitAnyExprToTemp(Idx).getScalarVal(); IdxInst = Builder.CreateIntCast(IdxInst, ResType, IdxSigned); // We go ahead with the calculation here. If the index turns out to be // negative, we'll catch it at the end. CountedByInst = Builder.CreateSub(CountedByInst, IdxInst, "", !IsSigned, IsSigned); } // Calculate how large the flexible array member is in bytes. const ArrayType *ArrayTy = Ctx.getAsArrayType(FAMDecl->getType()); CharUnits Size = Ctx.getTypeSizeInChars(ArrayTy->getElementType()); llvm::Constant *ElemSize = llvm::ConstantInt::get(ResType, Size.getQuantity(), IsSigned); - Value *FAMSize = + Value *Res = Builder.CreateMul(CountedByInst, ElemSize, "", !IsSigned, IsSigned); - FAMSize = Builder.CreateIntCast(FAMSize, ResType, IsSigned); - Value *Res = FAMSize; - - if (isa(Base)) { - // The whole struct is specificed in the __bdos. - const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(OuterRD); - - // Get the offset of the FAM. - llvm::Constant *FAMOffset = ConstantInt::get(ResType, Offset, IsSigned); - Value *OffsetAndFAMSize = - Builder.CreateAdd(FAMOffset, Res, "", !IsSigned, IsSigned); - - // Get the full size of the struct. - llvm::Constant *SizeofStruct = - ConstantInt::get(ResType, Layout.getSize().getQuantity(), IsSigned); - - // max(sizeof(struct s), - // offsetof(struct s, array) + p->count * sizeof(*p->array)) - Res = IsSigned - ? Builder.CreateBinaryIntrinsic(llvm::Intrinsic::smax, - OffsetAndFAMSize, SizeofStruct) - : Builder.CreateBinaryIntrinsic(llvm::Intrinsic::umax, - OffsetAndFAMSize, SizeofStruct); - } + Res = Builder.CreateIntCast(Res, ResType, IsSigned); // A negative \p IdxInst or \p CountedByInst means that the index lands // outside of the flexible array member. If that's the case, we want to // return 0. Value *Cmp = Builder.CreateIsNotNeg(CountedByInst); if (IdxInst) Cmp = Builder.CreateAnd(Builder.CreateIsNotNeg(IdxInst), Cmp); return Builder.CreateSelect(Cmp, Res, ConstantInt::get(ResType, 0, IsSigned)); } /// Returns a Value corresponding to the size of the given expression. /// This Value may be either of the following: /// - A llvm::Argument (if E is a param with the pass_object_size attribute on /// it) /// - A call to the @llvm.objectsize intrinsic /// /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null /// and we wouldn't otherwise try to reference a pass_object_size parameter, /// we'll call @llvm.objectsize on EmittedE, rather than emitting E. llvm::Value * CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, llvm::IntegerType *ResType, llvm::Value *EmittedE, bool IsDynamic) { // We need to reference an argument if the pointer is a parameter with the // pass_object_size attribute. if (auto *D = dyn_cast(E->IgnoreParenImpCasts())) { auto *Param = dyn_cast(D->getDecl()); auto *PS = D->getDecl()->getAttr(); if (Param != nullptr && PS != nullptr && areBOSTypesCompatible(PS->getType(), Type)) { auto Iter = SizeArguments.find(Param); assert(Iter != SizeArguments.end()); const ImplicitParamDecl *D = Iter->second; auto DIter = LocalDeclMap.find(D); assert(DIter != LocalDeclMap.end()); return EmitLoadOfScalar(DIter->second, /*Volatile=*/false, getContext().getSizeType(), E->getBeginLoc()); } } if (IsDynamic) { // Emit special code for a flexible array member with the "counted_by" // attribute. if (Value *V = emitFlexibleArrayMemberSize(E, Type, ResType)) return V; } // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't // evaluate E for side-effects. In either case, we shouldn't lower to // @llvm.objectsize. if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext()))) return getDefaultBuiltinObjectSizeResult(Type, ResType); Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E); assert(Ptr->getType()->isPointerTy() && "Non-pointer passed to __builtin_object_size?"); Function *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()}); // LLVM only supports 0 and 2, make sure that we pass along that as a boolean. Value *Min = Builder.getInt1((Type & 2) != 0); // For GCC compatibility, __builtin_object_size treat NULL as unknown size. Value *NullIsUnknown = Builder.getTrue(); Value *Dynamic = Builder.getInt1(IsDynamic); return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic}); } namespace { /// A struct to generically describe a bit test intrinsic. struct BitTest { enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set }; enum InterlockingKind : uint8_t { Unlocked, Sequential, Acquire, Release, NoFence }; ActionKind Action; InterlockingKind Interlocking; bool Is64Bit; static BitTest decodeBitTestBuiltin(unsigned BuiltinID); }; } // namespace BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) { switch (BuiltinID) { // Main portable variants. case Builtin::BI_bittest: return {TestOnly, Unlocked, false}; case Builtin::BI_bittestandcomplement: return {Complement, Unlocked, false}; case Builtin::BI_bittestandreset: return {Reset, Unlocked, false}; case Builtin::BI_bittestandset: return {Set, Unlocked, false}; case Builtin::BI_interlockedbittestandreset: return {Reset, Sequential, false}; case Builtin::BI_interlockedbittestandset: return {Set, Sequential, false}; // X86-specific 64-bit variants. case Builtin::BI_bittest64: return {TestOnly, Unlocked, true}; case Builtin::BI_bittestandcomplement64: return {Complement, Unlocked, true}; case Builtin::BI_bittestandreset64: return {Reset, Unlocked, true}; case Builtin::BI_bittestandset64: return {Set, Unlocked, true}; case Builtin::BI_interlockedbittestandreset64: return {Reset, Sequential, true}; case Builtin::BI_interlockedbittestandset64: return {Set, Sequential, true}; // ARM/AArch64-specific ordering variants. case Builtin::BI_interlockedbittestandset_acq: return {Set, Acquire, false}; case Builtin::BI_interlockedbittestandset_rel: return {Set, Release, false}; case Builtin::BI_interlockedbittestandset_nf: return {Set, NoFence, false}; case Builtin::BI_interlockedbittestandreset_acq: return {Reset, Acquire, false}; case Builtin::BI_interlockedbittestandreset_rel: return {Reset, Release, false}; case Builtin::BI_interlockedbittestandreset_nf: return {Reset, NoFence, false}; } llvm_unreachable("expected only bittest intrinsics"); } static char bitActionToX86BTCode(BitTest::ActionKind A) { switch (A) { case BitTest::TestOnly: return '\0'; case BitTest::Complement: return 'c'; case BitTest::Reset: return 'r'; case BitTest::Set: return 's'; } llvm_unreachable("invalid action"); } static llvm::Value *EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos) { char Action = bitActionToX86BTCode(BT.Action); char SizeSuffix = BT.Is64Bit ? 'q' : 'l'; // Build the assembly. SmallString<64> Asm; raw_svector_ostream AsmOS(Asm); if (BT.Interlocking != BitTest::Unlocked) AsmOS << "lock "; AsmOS << "bt"; if (Action) AsmOS << Action; AsmOS << SizeSuffix << " $2, ($1)"; // Build the constraints. FIXME: We should support immediates when possible. std::string Constraints = "={@ccc},r,r,~{cc},~{memory}"; std::string_view MachineClobbers = CGF.getTarget().getClobbers(); if (!MachineClobbers.empty()) { Constraints += ','; Constraints += MachineClobbers; } llvm::IntegerType *IntType = llvm::IntegerType::get( CGF.getLLVMContext(), CGF.getContext().getTypeSize(E->getArg(1)->getType())); llvm::FunctionType *FTy = llvm::FunctionType::get(CGF.Int8Ty, {CGF.UnqualPtrTy, IntType}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); return CGF.Builder.CreateCall(IA, {BitBase, BitPos}); } static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I) { switch (I) { case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic; case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent; case BitTest::Acquire: return llvm::AtomicOrdering::Acquire; case BitTest::Release: return llvm::AtomicOrdering::Release; case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic; } llvm_unreachable("invalid interlocking"); } /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of /// bits and a bit position and read and optionally modify the bit at that /// position. The position index can be arbitrarily large, i.e. it can be larger /// than 31 or 63, so we need an indexed load in the general case. static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E) { Value *BitBase = CGF.EmitScalarExpr(E->getArg(0)); Value *BitPos = CGF.EmitScalarExpr(E->getArg(1)); BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID); // X86 has special BT, BTC, BTR, and BTS instructions that handle the array // indexing operation internally. Use them if possible. if (CGF.getTarget().getTriple().isX86()) return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos); // Otherwise, use generic code to load one byte and test the bit. Use all but // the bottom three bits as the array index, and the bottom three bits to form // a mask. // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0; Value *ByteIndex = CGF.Builder.CreateAShr( BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx"); Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy); Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8, ByteIndex, "bittest.byteaddr"), CGF.Int8Ty, CharUnits::One()); Value *PosLow = CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty), llvm::ConstantInt::get(CGF.Int8Ty, 0x7)); // The updating instructions will need a mask. Value *Mask = nullptr; if (BT.Action != BitTest::TestOnly) { Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow, "bittest.mask"); } // Check the action and ordering of the interlocked intrinsics. llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking); Value *OldByte = nullptr; if (Ordering != llvm::AtomicOrdering::NotAtomic) { // Emit a combined atomicrmw load/store operation for the interlocked // intrinsics. llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or; if (BT.Action == BitTest::Reset) { Mask = CGF.Builder.CreateNot(Mask); RMWOp = llvm::AtomicRMWInst::And; } OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering); } else { // Emit a plain load for the non-interlocked intrinsics. OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte"); Value *NewByte = nullptr; switch (BT.Action) { case BitTest::TestOnly: // Don't store anything. break; case BitTest::Complement: NewByte = CGF.Builder.CreateXor(OldByte, Mask); break; case BitTest::Reset: NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask)); break; case BitTest::Set: NewByte = CGF.Builder.CreateOr(OldByte, Mask); break; } if (NewByte) CGF.Builder.CreateStore(NewByte, ByteAddr); } // However we loaded the old byte, either by plain load or atomicrmw, shift // the bit into the low position and mask it to 0 or 1. Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr"); return CGF.Builder.CreateAnd( ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"); } static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E) { Value *Addr = CGF.EmitScalarExpr(E->getArg(0)); SmallString<64> Asm; raw_svector_ostream AsmOS(Asm); llvm::IntegerType *RetType = CGF.Int32Ty; switch (BuiltinID) { case clang::PPC::BI__builtin_ppc_ldarx: AsmOS << "ldarx "; RetType = CGF.Int64Ty; break; case clang::PPC::BI__builtin_ppc_lwarx: AsmOS << "lwarx "; RetType = CGF.Int32Ty; break; case clang::PPC::BI__builtin_ppc_lharx: AsmOS << "lharx "; RetType = CGF.Int16Ty; break; case clang::PPC::BI__builtin_ppc_lbarx: AsmOS << "lbarx "; RetType = CGF.Int8Ty; break; default: llvm_unreachable("Expected only PowerPC load reserve intrinsics"); } AsmOS << "$0, ${1:y}"; std::string Constraints = "=r,*Z,~{memory}"; std::string_view MachineClobbers = CGF.getTarget().getClobbers(); if (!MachineClobbers.empty()) { Constraints += ','; Constraints += MachineClobbers; } llvm::Type *PtrType = CGF.UnqualPtrTy; llvm::FunctionType *FTy = llvm::FunctionType::get(RetType, {PtrType}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); llvm::CallInst *CI = CGF.Builder.CreateCall(IA, {Addr}); CI->addParamAttr( 0, Attribute::get(CGF.getLLVMContext(), Attribute::ElementType, RetType)); return CI; } namespace { enum class MSVCSetJmpKind { _setjmpex, _setjmp3, _setjmp }; } /// MSVC handles setjmp a bit differently on different platforms. On every /// architecture except 32-bit x86, the frame address is passed. On x86, extra /// parameters can be passed as variadic arguments, but we always pass none. static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E) { llvm::Value *Arg1 = nullptr; llvm::Type *Arg1Ty = nullptr; StringRef Name; bool IsVarArg = false; if (SJKind == MSVCSetJmpKind::_setjmp3) { Name = "_setjmp3"; Arg1Ty = CGF.Int32Ty; Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0); IsVarArg = true; } else { Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex"; Arg1Ty = CGF.Int8PtrTy; if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) { Arg1 = CGF.Builder.CreateCall( CGF.CGM.getIntrinsic(Intrinsic::sponentry, CGF.AllocaInt8PtrTy)); } else Arg1 = CGF.Builder.CreateCall( CGF.CGM.getIntrinsic(Intrinsic::frameaddress, CGF.AllocaInt8PtrTy), llvm::ConstantInt::get(CGF.Int32Ty, 0)); } // Mark the call site and declaration with ReturnsTwice. llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty}; llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get( CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::ReturnsTwice); llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction( llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name, ReturnsTwiceAttr, /*Local=*/true); llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast( CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy); llvm::Value *Args[] = {Buf, Arg1}; llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args); CB->setAttributes(ReturnsTwiceAttr); return RValue::get(CB); } // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code, // we handle them here. enum class CodeGenFunction::MSVCIntrin { _BitScanForward, _BitScanReverse, _InterlockedAnd, _InterlockedDecrement, _InterlockedExchange, _InterlockedExchangeAdd, _InterlockedExchangeSub, _InterlockedIncrement, _InterlockedOr, _InterlockedXor, _InterlockedExchangeAdd_acq, _InterlockedExchangeAdd_rel, _InterlockedExchangeAdd_nf, _InterlockedExchange_acq, _InterlockedExchange_rel, _InterlockedExchange_nf, _InterlockedCompareExchange_acq, _InterlockedCompareExchange_rel, _InterlockedCompareExchange_nf, _InterlockedCompareExchange128, _InterlockedCompareExchange128_acq, _InterlockedCompareExchange128_rel, _InterlockedCompareExchange128_nf, _InterlockedOr_acq, _InterlockedOr_rel, _InterlockedOr_nf, _InterlockedXor_acq, _InterlockedXor_rel, _InterlockedXor_nf, _InterlockedAnd_acq, _InterlockedAnd_rel, _InterlockedAnd_nf, _InterlockedIncrement_acq, _InterlockedIncrement_rel, _InterlockedIncrement_nf, _InterlockedDecrement_acq, _InterlockedDecrement_rel, _InterlockedDecrement_nf, __fastfail, }; static std::optional translateArmToMsvcIntrin(unsigned BuiltinID) { using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: return std::nullopt; case clang::ARM::BI_BitScanForward: case clang::ARM::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; case clang::ARM::BI_BitScanReverse: case clang::ARM::BI_BitScanReverse64: return MSVCIntrin::_BitScanReverse; case clang::ARM::BI_InterlockedAnd64: return MSVCIntrin::_InterlockedAnd; case clang::ARM::BI_InterlockedExchange64: return MSVCIntrin::_InterlockedExchange; case clang::ARM::BI_InterlockedExchangeAdd64: return MSVCIntrin::_InterlockedExchangeAdd; case clang::ARM::BI_InterlockedExchangeSub64: return MSVCIntrin::_InterlockedExchangeSub; case clang::ARM::BI_InterlockedOr64: return MSVCIntrin::_InterlockedOr; case clang::ARM::BI_InterlockedXor64: return MSVCIntrin::_InterlockedXor; case clang::ARM::BI_InterlockedDecrement64: return MSVCIntrin::_InterlockedDecrement; case clang::ARM::BI_InterlockedIncrement64: return MSVCIntrin::_InterlockedIncrement; case clang::ARM::BI_InterlockedExchangeAdd8_acq: case clang::ARM::BI_InterlockedExchangeAdd16_acq: case clang::ARM::BI_InterlockedExchangeAdd_acq: case clang::ARM::BI_InterlockedExchangeAdd64_acq: return MSVCIntrin::_InterlockedExchangeAdd_acq; case clang::ARM::BI_InterlockedExchangeAdd8_rel: case clang::ARM::BI_InterlockedExchangeAdd16_rel: case clang::ARM::BI_InterlockedExchangeAdd_rel: case clang::ARM::BI_InterlockedExchangeAdd64_rel: return MSVCIntrin::_InterlockedExchangeAdd_rel; case clang::ARM::BI_InterlockedExchangeAdd8_nf: case clang::ARM::BI_InterlockedExchangeAdd16_nf: case clang::ARM::BI_InterlockedExchangeAdd_nf: case clang::ARM::BI_InterlockedExchangeAdd64_nf: return MSVCIntrin::_InterlockedExchangeAdd_nf; case clang::ARM::BI_InterlockedExchange8_acq: case clang::ARM::BI_InterlockedExchange16_acq: case clang::ARM::BI_InterlockedExchange_acq: case clang::ARM::BI_InterlockedExchange64_acq: return MSVCIntrin::_InterlockedExchange_acq; case clang::ARM::BI_InterlockedExchange8_rel: case clang::ARM::BI_InterlockedExchange16_rel: case clang::ARM::BI_InterlockedExchange_rel: case clang::ARM::BI_InterlockedExchange64_rel: return MSVCIntrin::_InterlockedExchange_rel; case clang::ARM::BI_InterlockedExchange8_nf: case clang::ARM::BI_InterlockedExchange16_nf: case clang::ARM::BI_InterlockedExchange_nf: case clang::ARM::BI_InterlockedExchange64_nf: return MSVCIntrin::_InterlockedExchange_nf; case clang::ARM::BI_InterlockedCompareExchange8_acq: case clang::ARM::BI_InterlockedCompareExchange16_acq: case clang::ARM::BI_InterlockedCompareExchange_acq: case clang::ARM::BI_InterlockedCompareExchange64_acq: return MSVCIntrin::_InterlockedCompareExchange_acq; case clang::ARM::BI_InterlockedCompareExchange8_rel: case clang::ARM::BI_InterlockedCompareExchange16_rel: case clang::ARM::BI_InterlockedCompareExchange_rel: case clang::ARM::BI_InterlockedCompareExchange64_rel: return MSVCIntrin::_InterlockedCompareExchange_rel; case clang::ARM::BI_InterlockedCompareExchange8_nf: case clang::ARM::BI_InterlockedCompareExchange16_nf: case clang::ARM::BI_InterlockedCompareExchange_nf: case clang::ARM::BI_InterlockedCompareExchange64_nf: return MSVCIntrin::_InterlockedCompareExchange_nf; case clang::ARM::BI_InterlockedOr8_acq: case clang::ARM::BI_InterlockedOr16_acq: case clang::ARM::BI_InterlockedOr_acq: case clang::ARM::BI_InterlockedOr64_acq: return MSVCIntrin::_InterlockedOr_acq; case clang::ARM::BI_InterlockedOr8_rel: case clang::ARM::BI_InterlockedOr16_rel: case clang::ARM::BI_InterlockedOr_rel: case clang::ARM::BI_InterlockedOr64_rel: return MSVCIntrin::_InterlockedOr_rel; case clang::ARM::BI_InterlockedOr8_nf: case clang::ARM::BI_InterlockedOr16_nf: case clang::ARM::BI_InterlockedOr_nf: case clang::ARM::BI_InterlockedOr64_nf: return MSVCIntrin::_InterlockedOr_nf; case clang::ARM::BI_InterlockedXor8_acq: case clang::ARM::BI_InterlockedXor16_acq: case clang::ARM::BI_InterlockedXor_acq: case clang::ARM::BI_InterlockedXor64_acq: return MSVCIntrin::_InterlockedXor_acq; case clang::ARM::BI_InterlockedXor8_rel: case clang::ARM::BI_InterlockedXor16_rel: case clang::ARM::BI_InterlockedXor_rel: case clang::ARM::BI_InterlockedXor64_rel: return MSVCIntrin::_InterlockedXor_rel; case clang::ARM::BI_InterlockedXor8_nf: case clang::ARM::BI_InterlockedXor16_nf: case clang::ARM::BI_InterlockedXor_nf: case clang::ARM::BI_InterlockedXor64_nf: return MSVCIntrin::_InterlockedXor_nf; case clang::ARM::BI_InterlockedAnd8_acq: case clang::ARM::BI_InterlockedAnd16_acq: case clang::ARM::BI_InterlockedAnd_acq: case clang::ARM::BI_InterlockedAnd64_acq: return MSVCIntrin::_InterlockedAnd_acq; case clang::ARM::BI_InterlockedAnd8_rel: case clang::ARM::BI_InterlockedAnd16_rel: case clang::ARM::BI_InterlockedAnd_rel: case clang::ARM::BI_InterlockedAnd64_rel: return MSVCIntrin::_InterlockedAnd_rel; case clang::ARM::BI_InterlockedAnd8_nf: case clang::ARM::BI_InterlockedAnd16_nf: case clang::ARM::BI_InterlockedAnd_nf: case clang::ARM::BI_InterlockedAnd64_nf: return MSVCIntrin::_InterlockedAnd_nf; case clang::ARM::BI_InterlockedIncrement16_acq: case clang::ARM::BI_InterlockedIncrement_acq: case clang::ARM::BI_InterlockedIncrement64_acq: return MSVCIntrin::_InterlockedIncrement_acq; case clang::ARM::BI_InterlockedIncrement16_rel: case clang::ARM::BI_InterlockedIncrement_rel: case clang::ARM::BI_InterlockedIncrement64_rel: return MSVCIntrin::_InterlockedIncrement_rel; case clang::ARM::BI_InterlockedIncrement16_nf: case clang::ARM::BI_InterlockedIncrement_nf: case clang::ARM::BI_InterlockedIncrement64_nf: return MSVCIntrin::_InterlockedIncrement_nf; case clang::ARM::BI_InterlockedDecrement16_acq: case clang::ARM::BI_InterlockedDecrement_acq: case clang::ARM::BI_InterlockedDecrement64_acq: return MSVCIntrin::_InterlockedDecrement_acq; case clang::ARM::BI_InterlockedDecrement16_rel: case clang::ARM::BI_InterlockedDecrement_rel: case clang::ARM::BI_InterlockedDecrement64_rel: return MSVCIntrin::_InterlockedDecrement_rel; case clang::ARM::BI_InterlockedDecrement16_nf: case clang::ARM::BI_InterlockedDecrement_nf: case clang::ARM::BI_InterlockedDecrement64_nf: return MSVCIntrin::_InterlockedDecrement_nf; } llvm_unreachable("must return from switch"); } static std::optional translateAarch64ToMsvcIntrin(unsigned BuiltinID) { using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: return std::nullopt; case clang::AArch64::BI_BitScanForward: case clang::AArch64::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; case clang::AArch64::BI_BitScanReverse: case clang::AArch64::BI_BitScanReverse64: return MSVCIntrin::_BitScanReverse; case clang::AArch64::BI_InterlockedAnd64: return MSVCIntrin::_InterlockedAnd; case clang::AArch64::BI_InterlockedExchange64: return MSVCIntrin::_InterlockedExchange; case clang::AArch64::BI_InterlockedExchangeAdd64: return MSVCIntrin::_InterlockedExchangeAdd; case clang::AArch64::BI_InterlockedExchangeSub64: return MSVCIntrin::_InterlockedExchangeSub; case clang::AArch64::BI_InterlockedOr64: return MSVCIntrin::_InterlockedOr; case clang::AArch64::BI_InterlockedXor64: return MSVCIntrin::_InterlockedXor; case clang::AArch64::BI_InterlockedDecrement64: return MSVCIntrin::_InterlockedDecrement; case clang::AArch64::BI_InterlockedIncrement64: return MSVCIntrin::_InterlockedIncrement; case clang::AArch64::BI_InterlockedExchangeAdd8_acq: case clang::AArch64::BI_InterlockedExchangeAdd16_acq: case clang::AArch64::BI_InterlockedExchangeAdd_acq: case clang::AArch64::BI_InterlockedExchangeAdd64_acq: return MSVCIntrin::_InterlockedExchangeAdd_acq; case clang::AArch64::BI_InterlockedExchangeAdd8_rel: case clang::AArch64::BI_InterlockedExchangeAdd16_rel: case clang::AArch64::BI_InterlockedExchangeAdd_rel: case clang::AArch64::BI_InterlockedExchangeAdd64_rel: return MSVCIntrin::_InterlockedExchangeAdd_rel; case clang::AArch64::BI_InterlockedExchangeAdd8_nf: case clang::AArch64::BI_InterlockedExchangeAdd16_nf: case clang::AArch64::BI_InterlockedExchangeAdd_nf: case clang::AArch64::BI_InterlockedExchangeAdd64_nf: return MSVCIntrin::_InterlockedExchangeAdd_nf; case clang::AArch64::BI_InterlockedExchange8_acq: case clang::AArch64::BI_InterlockedExchange16_acq: case clang::AArch64::BI_InterlockedExchange_acq: case clang::AArch64::BI_InterlockedExchange64_acq: return MSVCIntrin::_InterlockedExchange_acq; case clang::AArch64::BI_InterlockedExchange8_rel: case clang::AArch64::BI_InterlockedExchange16_rel: case clang::AArch64::BI_InterlockedExchange_rel: case clang::AArch64::BI_InterlockedExchange64_rel: return MSVCIntrin::_InterlockedExchange_rel; case clang::AArch64::BI_InterlockedExchange8_nf: case clang::AArch64::BI_InterlockedExchange16_nf: case clang::AArch64::BI_InterlockedExchange_nf: case clang::AArch64::BI_InterlockedExchange64_nf: return MSVCIntrin::_InterlockedExchange_nf; case clang::AArch64::BI_InterlockedCompareExchange8_acq: case clang::AArch64::BI_InterlockedCompareExchange16_acq: case clang::AArch64::BI_InterlockedCompareExchange_acq: case clang::AArch64::BI_InterlockedCompareExchange64_acq: return MSVCIntrin::_InterlockedCompareExchange_acq; case clang::AArch64::BI_InterlockedCompareExchange8_rel: case clang::AArch64::BI_InterlockedCompareExchange16_rel: case clang::AArch64::BI_InterlockedCompareExchange_rel: case clang::AArch64::BI_InterlockedCompareExchange64_rel: return MSVCIntrin::_InterlockedCompareExchange_rel; case clang::AArch64::BI_InterlockedCompareExchange8_nf: case clang::AArch64::BI_InterlockedCompareExchange16_nf: case clang::AArch64::BI_InterlockedCompareExchange_nf: case clang::AArch64::BI_InterlockedCompareExchange64_nf: return MSVCIntrin::_InterlockedCompareExchange_nf; case clang::AArch64::BI_InterlockedCompareExchange128: return MSVCIntrin::_InterlockedCompareExchange128; case clang::AArch64::BI_InterlockedCompareExchange128_acq: return MSVCIntrin::_InterlockedCompareExchange128_acq; case clang::AArch64::BI_InterlockedCompareExchange128_nf: return MSVCIntrin::_InterlockedCompareExchange128_nf; case clang::AArch64::BI_InterlockedCompareExchange128_rel: return MSVCIntrin::_InterlockedCompareExchange128_rel; case clang::AArch64::BI_InterlockedOr8_acq: case clang::AArch64::BI_InterlockedOr16_acq: case clang::AArch64::BI_InterlockedOr_acq: case clang::AArch64::BI_InterlockedOr64_acq: return MSVCIntrin::_InterlockedOr_acq; case clang::AArch64::BI_InterlockedOr8_rel: case clang::AArch64::BI_InterlockedOr16_rel: case clang::AArch64::BI_InterlockedOr_rel: case clang::AArch64::BI_InterlockedOr64_rel: return MSVCIntrin::_InterlockedOr_rel; case clang::AArch64::BI_InterlockedOr8_nf: case clang::AArch64::BI_InterlockedOr16_nf: case clang::AArch64::BI_InterlockedOr_nf: case clang::AArch64::BI_InterlockedOr64_nf: return MSVCIntrin::_InterlockedOr_nf; case clang::AArch64::BI_InterlockedXor8_acq: case clang::AArch64::BI_InterlockedXor16_acq: case clang::AArch64::BI_InterlockedXor_acq: case clang::AArch64::BI_InterlockedXor64_acq: return MSVCIntrin::_InterlockedXor_acq; case clang::AArch64::BI_InterlockedXor8_rel: case clang::AArch64::BI_InterlockedXor16_rel: case clang::AArch64::BI_InterlockedXor_rel: case clang::AArch64::BI_InterlockedXor64_rel: return MSVCIntrin::_InterlockedXor_rel; case clang::AArch64::BI_InterlockedXor8_nf: case clang::AArch64::BI_InterlockedXor16_nf: case clang::AArch64::BI_InterlockedXor_nf: case clang::AArch64::BI_InterlockedXor64_nf: return MSVCIntrin::_InterlockedXor_nf; case clang::AArch64::BI_InterlockedAnd8_acq: case clang::AArch64::BI_InterlockedAnd16_acq: case clang::AArch64::BI_InterlockedAnd_acq: case clang::AArch64::BI_InterlockedAnd64_acq: return MSVCIntrin::_InterlockedAnd_acq; case clang::AArch64::BI_InterlockedAnd8_rel: case clang::AArch64::BI_InterlockedAnd16_rel: case clang::AArch64::BI_InterlockedAnd_rel: case clang::AArch64::BI_InterlockedAnd64_rel: return MSVCIntrin::_InterlockedAnd_rel; case clang::AArch64::BI_InterlockedAnd8_nf: case clang::AArch64::BI_InterlockedAnd16_nf: case clang::AArch64::BI_InterlockedAnd_nf: case clang::AArch64::BI_InterlockedAnd64_nf: return MSVCIntrin::_InterlockedAnd_nf; case clang::AArch64::BI_InterlockedIncrement16_acq: case clang::AArch64::BI_InterlockedIncrement_acq: case clang::AArch64::BI_InterlockedIncrement64_acq: return MSVCIntrin::_InterlockedIncrement_acq; case clang::AArch64::BI_InterlockedIncrement16_rel: case clang::AArch64::BI_InterlockedIncrement_rel: case clang::AArch64::BI_InterlockedIncrement64_rel: return MSVCIntrin::_InterlockedIncrement_rel; case clang::AArch64::BI_InterlockedIncrement16_nf: case clang::AArch64::BI_InterlockedIncrement_nf: case clang::AArch64::BI_InterlockedIncrement64_nf: return MSVCIntrin::_InterlockedIncrement_nf; case clang::AArch64::BI_InterlockedDecrement16_acq: case clang::AArch64::BI_InterlockedDecrement_acq: case clang::AArch64::BI_InterlockedDecrement64_acq: return MSVCIntrin::_InterlockedDecrement_acq; case clang::AArch64::BI_InterlockedDecrement16_rel: case clang::AArch64::BI_InterlockedDecrement_rel: case clang::AArch64::BI_InterlockedDecrement64_rel: return MSVCIntrin::_InterlockedDecrement_rel; case clang::AArch64::BI_InterlockedDecrement16_nf: case clang::AArch64::BI_InterlockedDecrement_nf: case clang::AArch64::BI_InterlockedDecrement64_nf: return MSVCIntrin::_InterlockedDecrement_nf; } llvm_unreachable("must return from switch"); } static std::optional translateX86ToMsvcIntrin(unsigned BuiltinID) { using MSVCIntrin = CodeGenFunction::MSVCIntrin; switch (BuiltinID) { default: return std::nullopt; case clang::X86::BI_BitScanForward: case clang::X86::BI_BitScanForward64: return MSVCIntrin::_BitScanForward; case clang::X86::BI_BitScanReverse: case clang::X86::BI_BitScanReverse64: return MSVCIntrin::_BitScanReverse; case clang::X86::BI_InterlockedAnd64: return MSVCIntrin::_InterlockedAnd; case clang::X86::BI_InterlockedCompareExchange128: return MSVCIntrin::_InterlockedCompareExchange128; case clang::X86::BI_InterlockedExchange64: return MSVCIntrin::_InterlockedExchange; case clang::X86::BI_InterlockedExchangeAdd64: return MSVCIntrin::_InterlockedExchangeAdd; case clang::X86::BI_InterlockedExchangeSub64: return MSVCIntrin::_InterlockedExchangeSub; case clang::X86::BI_InterlockedOr64: return MSVCIntrin::_InterlockedOr; case clang::X86::BI_InterlockedXor64: return MSVCIntrin::_InterlockedXor; case clang::X86::BI_InterlockedDecrement64: return MSVCIntrin::_InterlockedDecrement; case clang::X86::BI_InterlockedIncrement64: return MSVCIntrin::_InterlockedIncrement; } llvm_unreachable("must return from switch"); } // Emit an MSVC intrinsic. Assumes that arguments have *not* been evaluated. Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E) { switch (BuiltinID) { case MSVCIntrin::_BitScanForward: case MSVCIntrin::_BitScanReverse: { Address IndexAddress(EmitPointerWithAlignment(E->getArg(0))); Value *ArgValue = EmitScalarExpr(E->getArg(1)); llvm::Type *ArgType = ArgValue->getType(); llvm::Type *IndexType = IndexAddress.getElementType(); llvm::Type *ResultType = ConvertType(E->getType()); Value *ArgZero = llvm::Constant::getNullValue(ArgType); Value *ResZero = llvm::Constant::getNullValue(ResultType); Value *ResOne = llvm::ConstantInt::get(ResultType, 1); BasicBlock *Begin = Builder.GetInsertBlock(); BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn); Builder.SetInsertPoint(End); PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result"); Builder.SetInsertPoint(Begin); Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero); BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn); Builder.CreateCondBr(IsZero, End, NotZero); Result->addIncoming(ResZero, Begin); Builder.SetInsertPoint(NotZero); if (BuiltinID == MSVCIntrin::_BitScanForward) { Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Builder.CreateStore(ZeroCount, IndexAddress, false); } else { unsigned ArgWidth = cast(ArgType)->getBitWidth(); Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()}); ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false); Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount); Builder.CreateStore(Index, IndexAddress, false); } Builder.CreateBr(End); Result->addIncoming(ResOne, NotZero); Builder.SetInsertPoint(End); return Result; } case MSVCIntrin::_InterlockedAnd: return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E); case MSVCIntrin::_InterlockedExchange: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E); case MSVCIntrin::_InterlockedExchangeAdd: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E); case MSVCIntrin::_InterlockedExchangeSub: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E); case MSVCIntrin::_InterlockedOr: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E); case MSVCIntrin::_InterlockedXor: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E); case MSVCIntrin::_InterlockedExchangeAdd_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedExchangeAdd_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedExchangeAdd_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedExchange_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedExchange_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedExchange_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedCompareExchange_acq: return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedCompareExchange_rel: return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedCompareExchange_nf: return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedCompareExchange128: return EmitAtomicCmpXchg128ForMSIntrin( *this, E, AtomicOrdering::SequentiallyConsistent); case MSVCIntrin::_InterlockedCompareExchange128_acq: return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedCompareExchange128_rel: return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedCompareExchange128_nf: return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedOr_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedOr_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedOr_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedXor_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedXor_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedXor_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedAnd_acq: return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedAnd_rel: return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedAnd_nf: return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedIncrement_acq: return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedIncrement_rel: return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedIncrement_nf: return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedDecrement_acq: return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire); case MSVCIntrin::_InterlockedDecrement_rel: return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release); case MSVCIntrin::_InterlockedDecrement_nf: return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic); case MSVCIntrin::_InterlockedDecrement: return EmitAtomicDecrementValue(*this, E); case MSVCIntrin::_InterlockedIncrement: return EmitAtomicIncrementValue(*this, E); case MSVCIntrin::__fastfail: { // Request immediate process termination from the kernel. The instruction // sequences to do this are documented on MSDN: // https://msdn.microsoft.com/en-us/library/dn774154.aspx llvm::Triple::ArchType ISA = getTarget().getTriple().getArch(); StringRef Asm, Constraints; switch (ISA) { default: ErrorUnsupported(E, "__fastfail call for this architecture"); break; case llvm::Triple::x86: case llvm::Triple::x86_64: Asm = "int $$0x29"; Constraints = "{cx}"; break; case llvm::Triple::thumb: Asm = "udf #251"; Constraints = "{r0}"; break; case llvm::Triple::aarch64: Asm = "brk #0xF003"; Constraints = "{w0}"; } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0))); CI->setAttributes(NoReturnAttr); return CI; } } llvm_unreachable("Incorrect MSVC intrinsic!"); } namespace { // ARC cleanup for __builtin_os_log_format struct CallObjCArcUse final : EHScopeStack::Cleanup { CallObjCArcUse(llvm::Value *object) : object(object) {} llvm::Value *object; void Emit(CodeGenFunction &CGF, Flags flags) override { CGF.EmitARCIntrinsicUse(object); } }; } Value *CodeGenFunction::EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind) { assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) && "Unsupported builtin check kind"); Value *ArgValue = EmitScalarExpr(E); if (!SanOpts.has(SanitizerKind::Builtin)) return ArgValue; SanitizerScope SanScope(this); Value *Cond = Builder.CreateICmpNE( ArgValue, llvm::Constant::getNullValue(ArgValue->getType())); EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin), SanitizerHandler::InvalidBuiltin, {EmitCheckSourceLocation(E->getExprLoc()), llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)}, std::nullopt); return ArgValue; } static Value *EmitAbs(CodeGenFunction &CGF, Value *ArgValue, bool HasNSW) { return CGF.Builder.CreateBinaryIntrinsic( Intrinsic::abs, ArgValue, ConstantInt::get(CGF.Builder.getInt1Ty(), HasNSW)); } static Value *EmitOverflowCheckedAbs(CodeGenFunction &CGF, const CallExpr *E, bool SanitizeOverflow) { Value *ArgValue = CGF.EmitScalarExpr(E->getArg(0)); // Try to eliminate overflow check. if (const auto *VCI = dyn_cast(ArgValue)) { if (!VCI->isMinSignedValue()) return EmitAbs(CGF, ArgValue, true); } CodeGenFunction::SanitizerScope SanScope(&CGF); Constant *Zero = Constant::getNullValue(ArgValue->getType()); Value *ResultAndOverflow = CGF.Builder.CreateBinaryIntrinsic( Intrinsic::ssub_with_overflow, Zero, ArgValue); Value *Result = CGF.Builder.CreateExtractValue(ResultAndOverflow, 0); Value *NotOverflow = CGF.Builder.CreateNot( CGF.Builder.CreateExtractValue(ResultAndOverflow, 1)); // TODO: support -ftrapv-handler. if (SanitizeOverflow) { CGF.EmitCheck({{NotOverflow, SanitizerKind::SignedIntegerOverflow}}, SanitizerHandler::NegateOverflow, {CGF.EmitCheckSourceLocation(E->getArg(0)->getExprLoc()), CGF.EmitCheckTypeDescriptor(E->getType())}, {ArgValue}); } else CGF.EmitTrapCheck(NotOverflow, SanitizerHandler::SubOverflow); Value *CmpResult = CGF.Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); return CGF.Builder.CreateSelect(CmpResult, Result, ArgValue, "abs"); } /// Get the argument type for arguments to os_log_helper. static CanQualType getOSLogArgType(ASTContext &C, int Size) { QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false); return C.getCanonicalType(UnsignedTy); } llvm::Function *CodeGenFunction::generateBuiltinOSLogHelperFunction( const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment) { ASTContext &Ctx = getContext(); llvm::SmallString<64> Name; { raw_svector_ostream OS(Name); OS << "__os_log_helper"; OS << "_" << BufferAlignment.getQuantity(); OS << "_" << int(Layout.getSummaryByte()); OS << "_" << int(Layout.getNumArgsByte()); for (const auto &Item : Layout.Items) OS << "_" << int(Item.getSizeByte()) << "_" << int(Item.getDescriptorByte()); } if (llvm::Function *F = CGM.getModule().getFunction(Name)) return F; llvm::SmallVector ArgTys; FunctionArgList Args; Args.push_back(ImplicitParamDecl::Create( Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy, ImplicitParamKind::Other)); ArgTys.emplace_back(Ctx.VoidPtrTy); for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) { char Size = Layout.Items[I].getSizeByte(); if (!Size) continue; QualType ArgTy = getOSLogArgType(Ctx, Size); Args.push_back(ImplicitParamDecl::Create( Ctx, nullptr, SourceLocation(), &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy, ImplicitParamKind::Other)); ArgTys.emplace_back(ArgTy); } QualType ReturnTy = Ctx.VoidTy; // The helper function has linkonce_odr linkage to enable the linker to merge // identical functions. To ensure the merging always happens, 'noinline' is // attached to the function when compiling with -Oz. const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args); llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *Fn = llvm::Function::Create( FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule()); Fn->setVisibility(llvm::GlobalValue::HiddenVisibility); CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn, /*IsThunk=*/false); CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn); Fn->setDoesNotThrow(); // Attach 'noinline' at -Oz. if (CGM.getCodeGenOpts().OptimizeSize == 2) Fn->addFnAttr(llvm::Attribute::NoInline); auto NL = ApplyDebugLocation::CreateEmpty(*this); StartFunction(GlobalDecl(), ReturnTy, Fn, FI, Args); // Create a scope with an artificial location for the body of this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); CharUnits Offset; Address BufAddr = makeNaturalAddressForPointer( Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"), Ctx.VoidTy, BufferAlignment); Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "summary")); Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs")); unsigned I = 1; for (const auto &Item : Layout.Items) { Builder.CreateStore( Builder.getInt8(Item.getDescriptorByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor")); Builder.CreateStore( Builder.getInt8(Item.getSizeByte()), Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize")); CharUnits Size = Item.size(); if (!Size.getQuantity()) continue; Address Arg = GetAddrOfLocalVar(Args[I]); Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData"); Addr = Addr.withElementType(Arg.getElementType()); Builder.CreateStore(Builder.CreateLoad(Arg), Addr); Offset += Size; ++I; } FinishFunction(); return Fn; } RValue CodeGenFunction::emitBuiltinOSLogFormat(const CallExpr &E) { assert(E.getNumArgs() >= 2 && "__builtin_os_log_format takes at least 2 arguments"); ASTContext &Ctx = getContext(); analyze_os_log::OSLogBufferLayout Layout; analyze_os_log::computeOSLogBufferLayout(Ctx, &E, Layout); Address BufAddr = EmitPointerWithAlignment(E.getArg(0)); llvm::SmallVector RetainableOperands; // Ignore argument 1, the format string. It is not currently used. CallArgList Args; Args.add(RValue::get(BufAddr.emitRawPointer(*this)), Ctx.VoidPtrTy); for (const auto &Item : Layout.Items) { int Size = Item.getSizeByte(); if (!Size) continue; llvm::Value *ArgVal; if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) { uint64_t Val = 0; for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I) Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8; ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val)); } else if (const Expr *TheExpr = Item.getExpr()) { ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false); // If a temporary object that requires destruction after the full // expression is passed, push a lifetime-extended cleanup to extend its // lifetime to the end of the enclosing block scope. auto LifetimeExtendObject = [&](const Expr *E) { E = E->IgnoreParenCasts(); // Extend lifetimes of objects returned by function calls and message // sends. // FIXME: We should do this in other cases in which temporaries are // created including arguments of non-ARC types (e.g., C++ // temporaries). if (isa(E) || isa(E)) return true; return false; }; if (TheExpr->getType()->isObjCRetainableType() && getLangOpts().ObjCAutoRefCount && LifetimeExtendObject(TheExpr)) { assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar && "Only scalar can be a ObjC retainable type"); if (!isa(ArgVal)) { CleanupKind Cleanup = getARCCleanupKind(); QualType Ty = TheExpr->getType(); RawAddress Alloca = RawAddress::invalid(); RawAddress Addr = CreateMemTemp(Ty, "os.log.arg", &Alloca); ArgVal = EmitARCRetain(Ty, ArgVal); Builder.CreateStore(ArgVal, Addr); pushLifetimeExtendedDestroy(Cleanup, Alloca, Ty, CodeGenFunction::destroyARCStrongPrecise, Cleanup & EHCleanup); // Push a clang.arc.use call to ensure ARC optimizer knows that the // argument has to be alive. if (CGM.getCodeGenOpts().OptimizationLevel != 0) pushCleanupAfterFullExpr(Cleanup, ArgVal); } } } else { ArgVal = Builder.getInt32(Item.getConstValue().getQuantity()); } unsigned ArgValSize = CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType()); llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(), ArgValSize); ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy); CanQualType ArgTy = getOSLogArgType(Ctx, Size); // If ArgVal has type x86_fp80, zero-extend ArgVal. ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy)); Args.add(RValue::get(ArgVal), ArgTy); } const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args); llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction( Layout, BufAddr.getAlignment()); EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args); return RValue::get(BufAddr, *this); } static bool isSpecialUnsignedMultiplySignedResult( unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo) { return BuiltinID == Builtin::BI__builtin_mul_overflow && Op1Info.Width == Op2Info.Width && Op2Info.Width == ResultInfo.Width && !Op1Info.Signed && !Op2Info.Signed && ResultInfo.Signed; } static RValue EmitCheckedUnsignedMultiplySignedResult( CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo) { assert(isSpecialUnsignedMultiplySignedResult( Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) && "Cannot specialize this multiply"); llvm::Value *V1 = CGF.EmitScalarExpr(Op1); llvm::Value *V2 = CGF.EmitScalarExpr(Op2); llvm::Value *HasOverflow; llvm::Value *Result = EmitOverflowIntrinsic( CGF, llvm::Intrinsic::umul_with_overflow, V1, V2, HasOverflow); // The intrinsic call will detect overflow when the value is > UINT_MAX, // however, since the original builtin had a signed result, we need to report // an overflow when the result is greater than INT_MAX. auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width); llvm::Value *IntMaxValue = llvm::ConstantInt::get(Result->getType(), IntMax); llvm::Value *IntMaxOverflow = CGF.Builder.CreateICmpUGT(Result, IntMaxValue); HasOverflow = CGF.Builder.CreateOr(HasOverflow, IntMaxOverflow); bool isVolatile = ResultArg->getType()->getPointeeType().isVolatileQualified(); Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); return RValue::get(HasOverflow); } /// Determine if a binop is a checked mixed-sign multiply we can specialize. static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo) { return BuiltinID == Builtin::BI__builtin_mul_overflow && std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width && Op1Info.Signed != Op2Info.Signed; } /// Emit a checked mixed-sign multiply. This is a cheaper specialization of /// the generic checked-binop irgen. static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo) { assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info, Op2Info, ResultInfo) && "Not a mixed-sign multipliction we can specialize"); // Emit the signed and unsigned operands. const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2; const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1; llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp); llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp); unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width; unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width; // One of the operands may be smaller than the other. If so, [s|z]ext it. if (SignedOpWidth < UnsignedOpWidth) Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext"); if (UnsignedOpWidth < SignedOpWidth) Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext"); llvm::Type *OpTy = Signed->getType(); llvm::Value *Zero = llvm::Constant::getNullValue(OpTy); Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg); llvm::Type *ResTy = ResultPtr.getElementType(); unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width); // Take the absolute value of the signed operand. llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero); llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed); llvm::Value *AbsSigned = CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed); // Perform a checked unsigned multiplication. llvm::Value *UnsignedOverflow; llvm::Value *UnsignedResult = EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned, Unsigned, UnsignedOverflow); llvm::Value *Overflow, *Result; if (ResultInfo.Signed) { // Signed overflow occurs if the result is greater than INT_MAX or lesser // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative). auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width).zext(OpWidth); llvm::Value *MaxResult = CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax), CGF.Builder.CreateZExt(IsNegative, OpTy)); llvm::Value *SignedOverflow = CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult); Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow); // Prepare the signed result (possibly by negating it). llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult); llvm::Value *SignedResult = CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult); Result = CGF.Builder.CreateTrunc(SignedResult, ResTy); } else { // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX. llvm::Value *Underflow = CGF.Builder.CreateAnd( IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult)); Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow); if (ResultInfo.Width < OpWidth) { auto IntMax = llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth); llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT( UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax)); Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow); } // Negate the product if it would be negative in infinite precision. Result = CGF.Builder.CreateSelect( IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult); Result = CGF.Builder.CreateTrunc(Result, ResTy); } assert(Overflow && Result && "Missing overflow or result"); bool isVolatile = ResultArg->getType()->getPointeeType().isVolatileQualified(); CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); return RValue::get(Overflow); } static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl &Seen) { if (const auto *Arr = Ctx.getAsArrayType(Ty)) Ty = Ctx.getBaseElementType(Arr); const auto *Record = Ty->getAsCXXRecordDecl(); if (!Record) return false; // We've already checked this type, or are in the process of checking it. if (!Seen.insert(Record).second) return false; assert(Record->hasDefinition() && "Incomplete types should already be diagnosed"); if (Record->isDynamicClass()) return true; for (FieldDecl *F : Record->fields()) { if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen)) return true; } return false; } /// Determine if the specified type requires laundering by checking if it is a /// dynamic class type or contains a subobject which is a dynamic class type. static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { if (!CGM.getCodeGenOpts().StrictVTablePointers) return false; llvm::SmallPtrSet Seen; return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen); } RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { llvm::Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); // The builtin's shift arg may have a different type than the source arg and // result, but the LLVM intrinsic uses the same type for all values. llvm::Type *Ty = Src->getType(); ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false); // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same. unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; Function *F = CGM.getIntrinsic(IID, Ty); return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } // Map math builtins for long-double to f128 version. static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) { switch (BuiltinID) { #define MUTATE_LDBL(func) \ case Builtin::BI__builtin_##func##l: \ return Builtin::BI__builtin_##func##f128; MUTATE_LDBL(sqrt) MUTATE_LDBL(cbrt) MUTATE_LDBL(fabs) MUTATE_LDBL(log) MUTATE_LDBL(log2) MUTATE_LDBL(log10) MUTATE_LDBL(log1p) MUTATE_LDBL(logb) MUTATE_LDBL(exp) MUTATE_LDBL(exp2) MUTATE_LDBL(expm1) MUTATE_LDBL(fdim) MUTATE_LDBL(hypot) MUTATE_LDBL(ilogb) MUTATE_LDBL(pow) MUTATE_LDBL(fmin) MUTATE_LDBL(fmax) MUTATE_LDBL(ceil) MUTATE_LDBL(trunc) MUTATE_LDBL(rint) MUTATE_LDBL(nearbyint) MUTATE_LDBL(round) MUTATE_LDBL(floor) MUTATE_LDBL(lround) MUTATE_LDBL(llround) MUTATE_LDBL(lrint) MUTATE_LDBL(llrint) MUTATE_LDBL(fmod) MUTATE_LDBL(modf) MUTATE_LDBL(nan) MUTATE_LDBL(nans) MUTATE_LDBL(inf) MUTATE_LDBL(fma) MUTATE_LDBL(sin) MUTATE_LDBL(cos) MUTATE_LDBL(tan) MUTATE_LDBL(sinh) MUTATE_LDBL(cosh) MUTATE_LDBL(tanh) MUTATE_LDBL(asin) MUTATE_LDBL(acos) MUTATE_LDBL(atan) MUTATE_LDBL(asinh) MUTATE_LDBL(acosh) MUTATE_LDBL(atanh) MUTATE_LDBL(atan2) MUTATE_LDBL(erf) MUTATE_LDBL(erfc) MUTATE_LDBL(ldexp) MUTATE_LDBL(frexp) MUTATE_LDBL(huge_val) MUTATE_LDBL(copysign) MUTATE_LDBL(nextafter) MUTATE_LDBL(nexttoward) MUTATE_LDBL(remainder) MUTATE_LDBL(remquo) MUTATE_LDBL(scalbln) MUTATE_LDBL(scalbn) MUTATE_LDBL(tgamma) MUTATE_LDBL(lgamma) #undef MUTATE_LDBL default: return BuiltinID; } } static Value *tryUseTestFPKind(CodeGenFunction &CGF, unsigned BuiltinID, Value *V) { if (CGF.Builder.getIsFPConstrained() && CGF.Builder.getDefaultConstrainedExcept() != fp::ebIgnore) { if (Value *Result = CGF.getTargetHooks().testFPKind(V, BuiltinID, CGF.Builder, CGF.CGM)) return Result; } return nullptr; } static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF, const FunctionDecl *FD) { auto Name = FD->getNameAsString() + "__hipstdpar_unsupported"; auto FnTy = CGF->CGM.getTypes().GetFunctionType(FD); auto UBF = CGF->CGM.getModule().getOrInsertFunction(Name, FnTy); SmallVector Args; for (auto &&FormalTy : FnTy->params()) Args.push_back(llvm::PoisonValue::get(FormalTy)); return RValue::get(CGF->Builder.CreateCall(UBF, Args)); } RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { const FunctionDecl *FD = GD.getDecl()->getAsFunction(); // See if we can constant fold this builtin. If so, don't emit it at all. // TODO: Extend this handling to all builtin calls that we can constant-fold. Expr::EvalResult Result; if (E->isPRValue() && E->EvaluateAsRValue(Result, CGM.getContext()) && !Result.hasSideEffects()) { if (Result.Val.isInt()) return RValue::get(llvm::ConstantInt::get(getLLVMContext(), Result.Val.getInt())); if (Result.Val.isFloat()) return RValue::get(llvm::ConstantFP::get(getLLVMContext(), Result.Val.getFloat())); } // If current long-double semantics is IEEE 128-bit, replace math builtins // of long-double with f128 equivalent. // TODO: This mutation should also be applied to other targets other than PPC, // after backend supports IEEE 128-bit style libcalls. if (getTarget().getTriple().isPPC64() && &getTarget().getLongDoubleFormat() == &llvm::APFloat::IEEEquad()) BuiltinID = mutateLongDoubleBuiltin(BuiltinID); // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are // likely to get lowered to the renamed library functions. const unsigned BuiltinIDIfNoAsmLabel = FD->hasAttr() ? 0 : BuiltinID; std::optional ErrnoOverriden; // ErrnoOverriden is true if math-errno is overriden via the // '#pragma float_control(precise, on)'. This pragma disables fast-math, // which implies math-errno. if (E->hasStoredFPFeatures()) { FPOptionsOverride OP = E->getFPFeatures(); if (OP.hasMathErrnoOverride()) ErrnoOverriden = OP.getMathErrnoOverride(); } // True if 'attribute__((optnone))' is used. This attribute overrides // fast-math which implies math-errno. bool OptNone = CurFuncDecl && CurFuncDecl->hasAttr(); // True if we are compiling at -O2 and errno has been disabled // using the '#pragma float_control(precise, off)', and // attribute opt-none hasn't been seen. bool ErrnoOverridenToFalseWithOpt = ErrnoOverriden.has_value() && !ErrnoOverriden.value() && !OptNone && CGM.getCodeGenOpts().OptimizationLevel != 0; // There are LLVM math intrinsics/instructions corresponding to math library // functions except the LLVM op will never set errno while the math library // might. Also, math builtins have the same semantics as their math library // twins. Thus, we can transform math library and builtin calls to their // LLVM counterparts if the call is marked 'const' (known to never set errno). // In case FP exceptions are enabled, the experimental versions of the // intrinsics model those. bool ConstAlways = getContext().BuiltinInfo.isConst(BuiltinID); // There's a special case with the fma builtins where they are always const // if the target environment is GNU or the target is OS is Windows and we're // targeting the MSVCRT.dll environment. // FIXME: This list can be become outdated. Need to find a way to get it some // other way. switch (BuiltinID) { case Builtin::BI__builtin_fma: case Builtin::BI__builtin_fmaf: case Builtin::BI__builtin_fmal: case Builtin::BI__builtin_fmaf16: case Builtin::BIfma: case Builtin::BIfmaf: case Builtin::BIfmal: { auto &Trip = CGM.getTriple(); if (Trip.isGNUEnvironment() || Trip.isOSMSVCRT()) ConstAlways = true; break; } default: break; } bool ConstWithoutErrnoAndExceptions = getContext().BuiltinInfo.isConstWithoutErrnoAndExceptions(BuiltinID); bool ConstWithoutExceptions = getContext().BuiltinInfo.isConstWithoutExceptions(BuiltinID); // ConstAttr is enabled in fast-math mode. In fast-math mode, math-errno is // disabled. // Math intrinsics are generated only when math-errno is disabled. Any pragmas // or attributes that affect math-errno should prevent or allow math // intrincs to be generated. Intrinsics are generated: // 1- In fast math mode, unless math-errno is overriden // via '#pragma float_control(precise, on)', or via an // 'attribute__((optnone))'. // 2- If math-errno was enabled on command line but overriden // to false via '#pragma float_control(precise, off))' and // 'attribute__((optnone))' hasn't been used. // 3- If we are compiling with optimization and errno has been disabled // via '#pragma float_control(precise, off)', and // 'attribute__((optnone))' hasn't been used. bool ConstWithoutErrnoOrExceptions = ConstWithoutErrnoAndExceptions || ConstWithoutExceptions; bool GenerateIntrinsics = (ConstAlways && !OptNone) || (!getLangOpts().MathErrno && !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone); if (!GenerateIntrinsics) { GenerateIntrinsics = ConstWithoutErrnoOrExceptions && !ConstWithoutErrnoAndExceptions; if (!GenerateIntrinsics) GenerateIntrinsics = ConstWithoutErrnoOrExceptions && (!getLangOpts().MathErrno && !(ErrnoOverriden.has_value() && ErrnoOverriden.value()) && !OptNone); if (!GenerateIntrinsics) GenerateIntrinsics = ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; } if (GenerateIntrinsics) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIacos: case Builtin::BIacosf: case Builtin::BIacosl: case Builtin::BI__builtin_acos: case Builtin::BI__builtin_acosf: case Builtin::BI__builtin_acosf16: case Builtin::BI__builtin_acosl: case Builtin::BI__builtin_acosf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::acos, Intrinsic::experimental_constrained_acos)); case Builtin::BIasin: case Builtin::BIasinf: case Builtin::BIasinl: case Builtin::BI__builtin_asin: case Builtin::BI__builtin_asinf: case Builtin::BI__builtin_asinf16: case Builtin::BI__builtin_asinl: case Builtin::BI__builtin_asinf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::asin, Intrinsic::experimental_constrained_asin)); case Builtin::BIatan: case Builtin::BIatanf: case Builtin::BIatanl: case Builtin::BI__builtin_atan: case Builtin::BI__builtin_atanf: case Builtin::BI__builtin_atanf16: case Builtin::BI__builtin_atanl: case Builtin::BI__builtin_atanf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::atan, Intrinsic::experimental_constrained_atan)); case Builtin::BIceil: case Builtin::BIceilf: case Builtin::BIceill: case Builtin::BI__builtin_ceil: case Builtin::BI__builtin_ceilf: case Builtin::BI__builtin_ceilf16: case Builtin::BI__builtin_ceill: case Builtin::BI__builtin_ceilf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::ceil, Intrinsic::experimental_constrained_ceil)); case Builtin::BIcopysign: case Builtin::BIcopysignf: case Builtin::BIcopysignl: case Builtin::BI__builtin_copysign: case Builtin::BI__builtin_copysignf: case Builtin::BI__builtin_copysignf16: case Builtin::BI__builtin_copysignl: case Builtin::BI__builtin_copysignf128: return RValue::get( emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::copysign)); case Builtin::BIcos: case Builtin::BIcosf: case Builtin::BIcosl: case Builtin::BI__builtin_cos: case Builtin::BI__builtin_cosf: case Builtin::BI__builtin_cosf16: case Builtin::BI__builtin_cosl: case Builtin::BI__builtin_cosf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::cos, Intrinsic::experimental_constrained_cos)); case Builtin::BIcosh: case Builtin::BIcoshf: case Builtin::BIcoshl: case Builtin::BI__builtin_cosh: case Builtin::BI__builtin_coshf: case Builtin::BI__builtin_coshf16: case Builtin::BI__builtin_coshl: case Builtin::BI__builtin_coshf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::cosh, Intrinsic::experimental_constrained_cosh)); case Builtin::BIexp: case Builtin::BIexpf: case Builtin::BIexpl: case Builtin::BI__builtin_exp: case Builtin::BI__builtin_expf: case Builtin::BI__builtin_expf16: case Builtin::BI__builtin_expl: case Builtin::BI__builtin_expf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp, Intrinsic::experimental_constrained_exp)); case Builtin::BIexp2: case Builtin::BIexp2f: case Builtin::BIexp2l: case Builtin::BI__builtin_exp2: case Builtin::BI__builtin_exp2f: case Builtin::BI__builtin_exp2f16: case Builtin::BI__builtin_exp2l: case Builtin::BI__builtin_exp2f128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::exp2, Intrinsic::experimental_constrained_exp2)); case Builtin::BI__builtin_exp10: case Builtin::BI__builtin_exp10f: case Builtin::BI__builtin_exp10f16: case Builtin::BI__builtin_exp10l: case Builtin::BI__builtin_exp10f128: { // TODO: strictfp support if (Builder.getIsFPConstrained()) break; return RValue::get( emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::exp10)); } case Builtin::BIfabs: case Builtin::BIfabsf: case Builtin::BIfabsl: case Builtin::BI__builtin_fabs: case Builtin::BI__builtin_fabsf: case Builtin::BI__builtin_fabsf16: case Builtin::BI__builtin_fabsl: case Builtin::BI__builtin_fabsf128: return RValue::get( emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::fabs)); case Builtin::BIfloor: case Builtin::BIfloorf: case Builtin::BIfloorl: case Builtin::BI__builtin_floor: case Builtin::BI__builtin_floorf: case Builtin::BI__builtin_floorf16: case Builtin::BI__builtin_floorl: case Builtin::BI__builtin_floorf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::floor, Intrinsic::experimental_constrained_floor)); case Builtin::BIfma: case Builtin::BIfmaf: case Builtin::BIfmal: case Builtin::BI__builtin_fma: case Builtin::BI__builtin_fmaf: case Builtin::BI__builtin_fmaf16: case Builtin::BI__builtin_fmal: case Builtin::BI__builtin_fmaf128: return RValue::get(emitTernaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::fma, Intrinsic::experimental_constrained_fma)); case Builtin::BIfmax: case Builtin::BIfmaxf: case Builtin::BIfmaxl: case Builtin::BI__builtin_fmax: case Builtin::BI__builtin_fmaxf: case Builtin::BI__builtin_fmaxf16: case Builtin::BI__builtin_fmaxl: case Builtin::BI__builtin_fmaxf128: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::maxnum, Intrinsic::experimental_constrained_maxnum)); case Builtin::BIfmin: case Builtin::BIfminf: case Builtin::BIfminl: case Builtin::BI__builtin_fmin: case Builtin::BI__builtin_fminf: case Builtin::BI__builtin_fminf16: case Builtin::BI__builtin_fminl: case Builtin::BI__builtin_fminf128: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::minnum, Intrinsic::experimental_constrained_minnum)); // fmod() is a special-case. It maps to the frem instruction rather than an // LLVM intrinsic. case Builtin::BIfmod: case Builtin::BIfmodf: case Builtin::BIfmodl: case Builtin::BI__builtin_fmod: case Builtin::BI__builtin_fmodf: case Builtin::BI__builtin_fmodf16: case Builtin::BI__builtin_fmodl: case Builtin::BI__builtin_fmodf128: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *Arg1 = EmitScalarExpr(E->getArg(0)); Value *Arg2 = EmitScalarExpr(E->getArg(1)); return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); } case Builtin::BIlog: case Builtin::BIlogf: case Builtin::BIlogl: case Builtin::BI__builtin_log: case Builtin::BI__builtin_logf: case Builtin::BI__builtin_logf16: case Builtin::BI__builtin_logl: case Builtin::BI__builtin_logf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log, Intrinsic::experimental_constrained_log)); case Builtin::BIlog10: case Builtin::BIlog10f: case Builtin::BIlog10l: case Builtin::BI__builtin_log10: case Builtin::BI__builtin_log10f: case Builtin::BI__builtin_log10f16: case Builtin::BI__builtin_log10l: case Builtin::BI__builtin_log10f128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log10, Intrinsic::experimental_constrained_log10)); case Builtin::BIlog2: case Builtin::BIlog2f: case Builtin::BIlog2l: case Builtin::BI__builtin_log2: case Builtin::BI__builtin_log2f: case Builtin::BI__builtin_log2f16: case Builtin::BI__builtin_log2l: case Builtin::BI__builtin_log2f128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::log2, Intrinsic::experimental_constrained_log2)); case Builtin::BInearbyint: case Builtin::BInearbyintf: case Builtin::BInearbyintl: case Builtin::BI__builtin_nearbyint: case Builtin::BI__builtin_nearbyintf: case Builtin::BI__builtin_nearbyintl: case Builtin::BI__builtin_nearbyintf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::nearbyint, Intrinsic::experimental_constrained_nearbyint)); case Builtin::BIpow: case Builtin::BIpowf: case Builtin::BIpowl: case Builtin::BI__builtin_pow: case Builtin::BI__builtin_powf: case Builtin::BI__builtin_powf16: case Builtin::BI__builtin_powl: case Builtin::BI__builtin_powf128: return RValue::get(emitBinaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::pow, Intrinsic::experimental_constrained_pow)); case Builtin::BIrint: case Builtin::BIrintf: case Builtin::BIrintl: case Builtin::BI__builtin_rint: case Builtin::BI__builtin_rintf: case Builtin::BI__builtin_rintf16: case Builtin::BI__builtin_rintl: case Builtin::BI__builtin_rintf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::rint, Intrinsic::experimental_constrained_rint)); case Builtin::BIround: case Builtin::BIroundf: case Builtin::BIroundl: case Builtin::BI__builtin_round: case Builtin::BI__builtin_roundf: case Builtin::BI__builtin_roundf16: case Builtin::BI__builtin_roundl: case Builtin::BI__builtin_roundf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::round, Intrinsic::experimental_constrained_round)); case Builtin::BIroundeven: case Builtin::BIroundevenf: case Builtin::BIroundevenl: case Builtin::BI__builtin_roundeven: case Builtin::BI__builtin_roundevenf: case Builtin::BI__builtin_roundevenf16: case Builtin::BI__builtin_roundevenl: case Builtin::BI__builtin_roundevenf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::roundeven, Intrinsic::experimental_constrained_roundeven)); case Builtin::BIsin: case Builtin::BIsinf: case Builtin::BIsinl: case Builtin::BI__builtin_sin: case Builtin::BI__builtin_sinf: case Builtin::BI__builtin_sinf16: case Builtin::BI__builtin_sinl: case Builtin::BI__builtin_sinf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::sin, Intrinsic::experimental_constrained_sin)); case Builtin::BIsinh: case Builtin::BIsinhf: case Builtin::BIsinhl: case Builtin::BI__builtin_sinh: case Builtin::BI__builtin_sinhf: case Builtin::BI__builtin_sinhf16: case Builtin::BI__builtin_sinhl: case Builtin::BI__builtin_sinhf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sinh, Intrinsic::experimental_constrained_sinh)); case Builtin::BIsqrt: case Builtin::BIsqrtf: case Builtin::BIsqrtl: case Builtin::BI__builtin_sqrt: case Builtin::BI__builtin_sqrtf: case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: case Builtin::BI__builtin_sqrtf128: case Builtin::BI__builtin_elementwise_sqrt: { llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt); SetSqrtFPAccuracy(Call); return RValue::get(Call); } case Builtin::BItan: case Builtin::BItanf: case Builtin::BItanl: case Builtin::BI__builtin_tan: case Builtin::BI__builtin_tanf: case Builtin::BI__builtin_tanf16: case Builtin::BI__builtin_tanl: case Builtin::BI__builtin_tanf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::tan, Intrinsic::experimental_constrained_tan)); case Builtin::BItanh: case Builtin::BItanhf: case Builtin::BItanhl: case Builtin::BI__builtin_tanh: case Builtin::BI__builtin_tanhf: case Builtin::BI__builtin_tanhf16: case Builtin::BI__builtin_tanhl: case Builtin::BI__builtin_tanhf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::tanh, Intrinsic::experimental_constrained_tanh)); case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: case Builtin::BI__builtin_trunc: case Builtin::BI__builtin_truncf: case Builtin::BI__builtin_truncf16: case Builtin::BI__builtin_truncl: case Builtin::BI__builtin_truncf128: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, Intrinsic::trunc, Intrinsic::experimental_constrained_trunc)); case Builtin::BIlround: case Builtin::BIlroundf: case Builtin::BIlroundl: case Builtin::BI__builtin_lround: case Builtin::BI__builtin_lroundf: case Builtin::BI__builtin_lroundl: case Builtin::BI__builtin_lroundf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::lround, Intrinsic::experimental_constrained_lround)); case Builtin::BIllround: case Builtin::BIllroundf: case Builtin::BIllroundl: case Builtin::BI__builtin_llround: case Builtin::BI__builtin_llroundf: case Builtin::BI__builtin_llroundl: case Builtin::BI__builtin_llroundf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::llround, Intrinsic::experimental_constrained_llround)); case Builtin::BIlrint: case Builtin::BIlrintf: case Builtin::BIlrintl: case Builtin::BI__builtin_lrint: case Builtin::BI__builtin_lrintf: case Builtin::BI__builtin_lrintl: case Builtin::BI__builtin_lrintf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::lrint, Intrinsic::experimental_constrained_lrint)); case Builtin::BIllrint: case Builtin::BIllrintf: case Builtin::BIllrintl: case Builtin::BI__builtin_llrint: case Builtin::BI__builtin_llrintf: case Builtin::BI__builtin_llrintl: case Builtin::BI__builtin_llrintf128: return RValue::get(emitMaybeConstrainedFPToIntRoundBuiltin( *this, E, Intrinsic::llrint, Intrinsic::experimental_constrained_llrint)); case Builtin::BI__builtin_ldexp: case Builtin::BI__builtin_ldexpf: case Builtin::BI__builtin_ldexpl: case Builtin::BI__builtin_ldexpf16: case Builtin::BI__builtin_ldexpf128: { return RValue::get(emitBinaryExpMaybeConstrainedFPBuiltin( *this, E, Intrinsic::ldexp, Intrinsic::experimental_constrained_ldexp)); } default: break; } } // Check NonnullAttribute/NullabilityArg and Alignment. auto EmitArgCheck = [&](TypeCheckKind Kind, Address A, const Expr *Arg, unsigned ParmNum) { Value *Val = A.emitRawPointer(*this); EmitNonNullArgCheck(RValue::get(Val), Arg->getType(), Arg->getExprLoc(), FD, ParmNum); if (SanOpts.has(SanitizerKind::Alignment)) { SanitizerSet SkippedChecks; SkippedChecks.set(SanitizerKind::All); SkippedChecks.clear(SanitizerKind::Alignment); SourceLocation Loc = Arg->getExprLoc(); // Strip an implicit cast. if (auto *CE = dyn_cast(Arg)) if (CE->getCastKind() == CK_BitCast) Arg = CE->getSubExpr(); EmitTypeCheck(Kind, Loc, Val, Arg->getType(), A.getAlignment(), SkippedChecks); } }; switch (BuiltinIDIfNoAsmLabel) { default: break; case Builtin::BI__builtin___CFStringMakeConstantString: case Builtin::BI__builtin___NSStringMakeConstantString: return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); case Builtin::BI__builtin_stdarg_start: case Builtin::BI__builtin_va_start: case Builtin::BI__va_start: case Builtin::BI__builtin_va_end: EmitVAStartEnd(BuiltinID == Builtin::BI__va_start ? EmitScalarExpr(E->getArg(0)) : EmitVAListRef(E->getArg(0)).emitRawPointer(*this), BuiltinID != Builtin::BI__builtin_va_end); return RValue::get(nullptr); case Builtin::BI__builtin_va_copy: { Value *DstPtr = EmitVAListRef(E->getArg(0)).emitRawPointer(*this); Value *SrcPtr = EmitVAListRef(E->getArg(1)).emitRawPointer(*this); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy, {DstPtr->getType()}), {DstPtr, SrcPtr}); return RValue::get(nullptr); } case Builtin::BIabs: case Builtin::BIlabs: case Builtin::BIllabs: case Builtin::BI__builtin_abs: case Builtin::BI__builtin_labs: case Builtin::BI__builtin_llabs: { bool SanitizeOverflow = SanOpts.has(SanitizerKind::SignedIntegerOverflow); Value *Result; switch (getLangOpts().getSignedOverflowBehavior()) { case LangOptions::SOB_Defined: Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), false); break; case LangOptions::SOB_Undefined: if (!SanitizeOverflow) { Result = EmitAbs(*this, EmitScalarExpr(E->getArg(0)), true); break; } [[fallthrough]]; case LangOptions::SOB_Trapping: // TODO: Somehow handle the corner case when the address of abs is taken. Result = EmitOverflowCheckedAbs(*this, E, SanitizeOverflow); break; } return RValue::get(Result); } case Builtin::BI__builtin_complex: { Value *Real = EmitScalarExpr(E->getArg(0)); Value *Imag = EmitScalarExpr(E->getArg(1)); return RValue::getComplex({Real, Imag}); } case Builtin::BI__builtin_conj: case Builtin::BI__builtin_conjf: case Builtin::BI__builtin_conjl: case Builtin::BIconj: case Builtin::BIconjf: case Builtin::BIconjl: { ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); Value *Real = ComplexVal.first; Value *Imag = ComplexVal.second; Imag = Builder.CreateFNeg(Imag, "neg"); return RValue::getComplex(std::make_pair(Real, Imag)); } case Builtin::BI__builtin_creal: case Builtin::BI__builtin_crealf: case Builtin::BI__builtin_creall: case Builtin::BIcreal: case Builtin::BIcrealf: case Builtin::BIcreall: { ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); return RValue::get(ComplexVal.first); } case Builtin::BI__builtin_preserve_access_index: { // Only enabled preserved access index region when debuginfo // is available as debuginfo is needed to preserve user-level // access pattern. if (!getDebugInfo()) { CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g"); return RValue::get(EmitScalarExpr(E->getArg(0))); } // Nested builtin_preserve_access_index() not supported if (IsInPreservedAIRegion) { CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported"); return RValue::get(EmitScalarExpr(E->getArg(0))); } IsInPreservedAIRegion = true; Value *Res = EmitScalarExpr(E->getArg(0)); IsInPreservedAIRegion = false; return RValue::get(Res); } case Builtin::BI__builtin_cimag: case Builtin::BI__builtin_cimagf: case Builtin::BI__builtin_cimagl: case Builtin::BIcimag: case Builtin::BIcimagf: case Builtin::BIcimagl: { ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); return RValue::get(ComplexVal.second); } case Builtin::BI__builtin_clrsb: case Builtin::BI__builtin_clrsbl: case Builtin::BI__builtin_clrsbll: { // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Zero = llvm::Constant::getNullValue(ArgType); Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); Value *Inverse = Builder.CreateNot(ArgValue, "not"); Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()}); Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1)); Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return RValue::get(Result); } case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: case Builtin::BI__builtin_ctzg: { bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_ctzg && E->getNumArgs() > 1; Value *ArgValue = HasFallback ? EmitScalarExpr(E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef()); Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); if (!HasFallback) return RValue::get(Result); Value *Zero = Constant::getNullValue(ArgType); Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); Value *FallbackValue = EmitScalarExpr(E->getArg(1)); Value *ResultOrFallback = Builder.CreateSelect(IsZero, FallbackValue, Result, "ctzg"); return RValue::get(ResultOrFallback); } case Builtin::BI__builtin_clzs: case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: case Builtin::BI__builtin_clzg: { bool HasFallback = BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_clzg && E->getNumArgs() > 1; Value *ArgValue = HasFallback ? EmitScalarExpr(E->getArg(0)) : EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(HasFallback || getTarget().isCLZForZeroUndef()); Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); if (!HasFallback) return RValue::get(Result); Value *Zero = Constant::getNullValue(ArgType); Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); Value *FallbackValue = EmitScalarExpr(E->getArg(1)); Value *ResultOrFallback = Builder.CreateSelect(IsZero, FallbackValue, Result, "clzg"); return RValue::get(ResultOrFallback); } case Builtin::BI__builtin_ffs: case Builtin::BI__builtin_ffsl: case Builtin::BI__builtin_ffsll: { // ffs(x) -> x ? cttz(x) + 1 : 0 Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), llvm::ConstantInt::get(ArgType, 1)); Value *Zero = llvm::Constant::getNullValue(ArgType); Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return RValue::get(Result); } case Builtin::BI__builtin_parity: case Builtin::BI__builtin_parityl: case Builtin::BI__builtin_parityll: { // parity(x) -> ctpop(x) & 1 Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Tmp = Builder.CreateCall(F, ArgValue); Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return RValue::get(Result); } case Builtin::BI__lzcnt16: case Builtin::BI__lzcnt: case Builtin::BI__lzcnt64: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return RValue::get(Result); } case Builtin::BI__popcnt16: case Builtin::BI__popcnt: case Builtin::BI__popcnt64: case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: case Builtin::BI__builtin_popcountg: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/ false, "cast"); return RValue::get(Result); } case Builtin::BI__builtin_unpredictable: { // Always return the argument of __builtin_unpredictable. LLVM does not // handle this builtin. Metadata for this builtin should be added directly // to instructions such as branches or switches that use it. return RValue::get(EmitScalarExpr(E->getArg(0))); } case Builtin::BI__builtin_expect: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); // Don't generate llvm.expect on -O0 as the backend won't use it for // anything. // Note, we still IRGen ExpectedValue because it could have side-effects. if (CGM.getCodeGenOpts().OptimizationLevel == 0) return RValue::get(ArgValue); Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); Value *Result = Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); return RValue::get(Result); } case Builtin::BI__builtin_expect_with_probability: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); llvm::APFloat Probability(0.0); const Expr *ProbArg = E->getArg(2); bool EvalSucceed = ProbArg->EvaluateAsFloat(Probability, CGM.getContext()); assert(EvalSucceed && "probability should be able to evaluate as float"); (void)EvalSucceed; bool LoseInfo = false; Probability.convert(llvm::APFloat::IEEEdouble(), llvm::RoundingMode::Dynamic, &LoseInfo); llvm::Type *Ty = ConvertType(ProbArg->getType()); Constant *Confidence = ConstantFP::get(Ty, Probability); // Don't generate llvm.expect.with.probability on -O0 as the backend // won't use it for anything. // Note, we still IRGen ExpectedValue because it could have side-effects. if (CGM.getCodeGenOpts().OptimizationLevel == 0) return RValue::get(ArgValue); Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect_with_probability, ArgType); Value *Result = Builder.CreateCall( FnExpect, {ArgValue, ExpectedValue, Confidence}, "expval"); return RValue::get(Result); } case Builtin::BI__builtin_assume_aligned: { const Expr *Ptr = E->getArg(0); Value *PtrValue = EmitScalarExpr(Ptr); Value *OffsetValue = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast(AlignmentValue); if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), llvm::Value::MaximumAlignment); emitAlignmentAssumption(PtrValue, Ptr, /*The expr loc is sufficient.*/ SourceLocation(), AlignmentCI, OffsetValue); return RValue::get(PtrValue); } case Builtin::BI__assume: case Builtin::BI__builtin_assume: { if (E->getArg(0)->HasSideEffects(getContext())) return RValue::get(nullptr); Value *ArgValue = EmitScalarExpr(E->getArg(0)); Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume); Builder.CreateCall(FnAssume, ArgValue); return RValue::get(nullptr); } case Builtin::BI__builtin_assume_separate_storage: { const Expr *Arg0 = E->getArg(0); const Expr *Arg1 = E->getArg(1); Value *Value0 = EmitScalarExpr(Arg0); Value *Value1 = EmitScalarExpr(Arg1); Value *Values[] = {Value0, Value1}; OperandBundleDefT OBD("separate_storage", Values); Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); return RValue::get(nullptr); } case Builtin::BI__builtin_allow_runtime_check: { StringRef Kind = cast(E->getArg(0)->IgnoreParenCasts())->getString(); LLVMContext &Ctx = CGM.getLLVMContext(); llvm::Value *Allow = Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::allow_runtime_check), llvm::MetadataAsValue::get(Ctx, llvm::MDString::get(Ctx, Kind))); return RValue::get(Allow); } case Builtin::BI__arithmetic_fence: { // Create the builtin call if FastMath is selected, and the target // supports the builtin, otherwise just return the argument. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); llvm::FastMathFlags FMF = Builder.getFastMathFlags(); bool isArithmeticFenceEnabled = FMF.allowReassoc() && getContext().getTargetInfo().checkArithmeticFenceSupported(); QualType ArgType = E->getArg(0)->getType(); if (ArgType->isComplexType()) { if (isArithmeticFenceEnabled) { QualType ElementType = ArgType->castAs()->getElementType(); ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); Value *Real = Builder.CreateArithmeticFence(ComplexVal.first, ConvertType(ElementType)); Value *Imag = Builder.CreateArithmeticFence(ComplexVal.second, ConvertType(ElementType)); return RValue::getComplex(std::make_pair(Real, Imag)); } ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); Value *Real = ComplexVal.first; Value *Imag = ComplexVal.second; return RValue::getComplex(std::make_pair(Real, Imag)); } Value *ArgValue = EmitScalarExpr(E->getArg(0)); if (isArithmeticFenceEnabled) return RValue::get( Builder.CreateArithmeticFence(ArgValue, ConvertType(ArgType))); return RValue::get(ArgValue); } case Builtin::BI__builtin_bswap16: case Builtin::BI__builtin_bswap32: case Builtin::BI__builtin_bswap64: case Builtin::BI_byteswap_ushort: case Builtin::BI_byteswap_ulong: case Builtin::BI_byteswap_uint64: { return RValue::get( emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bswap)); } case Builtin::BI__builtin_bitreverse8: case Builtin::BI__builtin_bitreverse16: case Builtin::BI__builtin_bitreverse32: case Builtin::BI__builtin_bitreverse64: { return RValue::get( emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::bitreverse)); } case Builtin::BI__builtin_rotateleft8: case Builtin::BI__builtin_rotateleft16: case Builtin::BI__builtin_rotateleft32: case Builtin::BI__builtin_rotateleft64: case Builtin::BI_rotl8: // Microsoft variants of rotate left case Builtin::BI_rotl16: case Builtin::BI_rotl: case Builtin::BI_lrotl: case Builtin::BI_rotl64: return emitRotate(E, false); case Builtin::BI__builtin_rotateright8: case Builtin::BI__builtin_rotateright16: case Builtin::BI__builtin_rotateright32: case Builtin::BI__builtin_rotateright64: case Builtin::BI_rotr8: // Microsoft variants of rotate right case Builtin::BI_rotr16: case Builtin::BI_rotr: case Builtin::BI_lrotr: case Builtin::BI_rotr64: return emitRotate(E, true); case Builtin::BI__builtin_constant_p: { llvm::Type *ResultType = ConvertType(E->getType()); const Expr *Arg = E->getArg(0); QualType ArgType = Arg->getType(); // FIXME: The allowance for Obj-C pointers and block pointers is historical // and likely a mistake. if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() && !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType()) // Per the GCC documentation, only numeric constants are recognized after // inlining. return RValue::get(ConstantInt::get(ResultType, 0)); if (Arg->HasSideEffects(getContext())) // The argument is unevaluated, so be conservative if it might have // side-effects. return RValue::get(ConstantInt::get(ResultType, 0)); Value *ArgValue = EmitScalarExpr(Arg); if (ArgType->isObjCObjectPointerType()) { // Convert Objective-C objects to id because we cannot distinguish between // LLVM types for Obj-C classes as they are opaque. ArgType = CGM.getContext().getObjCIdType(); ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType)); } Function *F = CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType)); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false); return RValue::get(Result); } case Builtin::BI__builtin_dynamic_object_size: case Builtin::BI__builtin_object_size: { unsigned Type = E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); auto *ResType = cast(ConvertType(E->getType())); // We pass this builtin onto the optimizer so that it can figure out the // object size in more complex cases. bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size; return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType, /*EmittedE=*/nullptr, IsDynamic)); } case Builtin::BI__builtin_prefetch: { Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); // FIXME: Technically these constants should of type 'int', yes? RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : llvm::ConstantInt::get(Int32Ty, 0); Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); Builder.CreateCall(F, {Address, RW, Locality, Data}); return RValue::get(nullptr); } case Builtin::BI__builtin_readcyclecounter: { Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin_readsteadycounter: { Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin___clear_cache: { Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); return RValue::get(Builder.CreateCall(F, {Begin, End})); } case Builtin::BI__builtin_trap: EmitTrapCall(Intrinsic::trap); return RValue::get(nullptr); case Builtin::BI__builtin_verbose_trap: { llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation(); if (getDebugInfo()) { TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor( TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()), *E->getArg(1)->tryEvaluateString(getContext())); } ApplyDebugLocation ApplyTrapDI(*this, TrapLocation); // Currently no attempt is made to prevent traps from being merged. EmitTrapCall(Intrinsic::trap); return RValue::get(nullptr); } case Builtin::BI__debugbreak: EmitTrapCall(Intrinsic::debugtrap); return RValue::get(nullptr); case Builtin::BI__builtin_unreachable: { EmitUnreachable(E->getExprLoc()); // We do need to preserve an insertion point. EmitBlock(createBasicBlock("unreachable.cont")); return RValue::get(nullptr); } case Builtin::BI__builtin_powi: case Builtin::BI__builtin_powif: case Builtin::BI__builtin_powil: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); if (Builder.getIsFPConstrained()) { // FIXME: llvm.powi has 2 mangling types, // llvm.experimental.constrained.powi has one. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_powi, Src0->getType()); return RValue::get(Builder.CreateConstrainedFPCall(F, { Src0, Src1 })); } Function *F = CGM.getIntrinsic(Intrinsic::powi, { Src0->getType(), Src1->getType() }); return RValue::get(Builder.CreateCall(F, { Src0, Src1 })); } case Builtin::BI__builtin_frexpl: { // Linux PPC will not be adding additional PPCDoubleDouble support. // WIP to switch default to IEEE long double. Will emit libcall for // frexpl instead of legalizing this type in the BE. if (&getTarget().getLongDoubleFormat() == &llvm::APFloat::PPCDoubleDouble()) break; [[fallthrough]]; } case Builtin::BI__builtin_frexp: case Builtin::BI__builtin_frexpf: case Builtin::BI__builtin_frexpf128: case Builtin::BI__builtin_frexpf16: return RValue::get(emitFrexpBuiltin(*this, E, Intrinsic::frexp)); case Builtin::BI__builtin_isgreater: case Builtin::BI__builtin_isgreaterequal: case Builtin::BI__builtin_isless: case Builtin::BI__builtin_islessequal: case Builtin::BI__builtin_islessgreater: case Builtin::BI__builtin_isunordered: { // Ordered comparisons: we know the arguments to these are matching scalar // floating point values. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); switch (BuiltinID) { default: llvm_unreachable("Unknown ordered comparison"); case Builtin::BI__builtin_isgreater: LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); break; case Builtin::BI__builtin_isgreaterequal: LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); break; case Builtin::BI__builtin_isless: LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); break; case Builtin::BI__builtin_islessequal: LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); break; case Builtin::BI__builtin_islessgreater: LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); break; case Builtin::BI__builtin_isunordered: LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); break; } // ZExt bool to int type. return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); } case Builtin::BI__builtin_isnan: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) return RValue::get(Result); return RValue::get( Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNan), ConvertType(E->getType()))); } case Builtin::BI__builtin_issignaling: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); return RValue::get( Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSNan), ConvertType(E->getType()))); } case Builtin::BI__builtin_isinf: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) return RValue::get(Result); return RValue::get( Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcInf), ConvertType(E->getType()))); } case Builtin::BIfinite: case Builtin::BI__finite: case Builtin::BIfinitef: case Builtin::BI__finitef: case Builtin::BIfinitel: case Builtin::BI__finitel: case Builtin::BI__builtin_isfinite: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) return RValue::get(Result); return RValue::get( Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcFinite), ConvertType(E->getType()))); } case Builtin::BI__builtin_isnormal: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); return RValue::get( Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcNormal), ConvertType(E->getType()))); } case Builtin::BI__builtin_issubnormal: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); return RValue::get( Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcSubnormal), ConvertType(E->getType()))); } case Builtin::BI__builtin_iszero: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); return RValue::get( Builder.CreateZExt(Builder.createIsFPClass(V, FPClassTest::fcZero), ConvertType(E->getType()))); } case Builtin::BI__builtin_isfpclass: { Expr::EvalResult Result; if (!E->getArg(1)->EvaluateAsInt(Result, CGM.getContext())) break; uint64_t Test = Result.Val.getInt().getLimitedValue(); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); return RValue::get(Builder.CreateZExt(Builder.createIsFPClass(V, Test), ConvertType(E->getType()))); } case Builtin::BI__builtin_nondeterministic_value: { llvm::Type *Ty = ConvertType(E->getArg(0)->getType()); Value *Result = PoisonValue::get(Ty); Result = Builder.CreateFreeze(Result); return RValue::get(Result); } case Builtin::BI__builtin_elementwise_abs: { Value *Result; QualType QT = E->getArg(0)->getType(); if (auto *VecTy = QT->getAs()) QT = VecTy->getElementType(); if (QT->isIntegerType()) Result = Builder.CreateBinaryIntrinsic( llvm::Intrinsic::abs, EmitScalarExpr(E->getArg(0)), Builder.getFalse(), nullptr, "elt.abs"); else Result = emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::fabs, "elt.abs"); return RValue::get(Result); } case Builtin::BI__builtin_elementwise_acos: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::acos, "elt.acos")); case Builtin::BI__builtin_elementwise_asin: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::asin, "elt.asin")); case Builtin::BI__builtin_elementwise_atan: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::atan, "elt.atan")); case Builtin::BI__builtin_elementwise_ceil: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::ceil, "elt.ceil")); case Builtin::BI__builtin_elementwise_exp: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::exp, "elt.exp")); case Builtin::BI__builtin_elementwise_exp2: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::exp2, "elt.exp2")); case Builtin::BI__builtin_elementwise_log: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::log, "elt.log")); case Builtin::BI__builtin_elementwise_log2: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::log2, "elt.log2")); case Builtin::BI__builtin_elementwise_log10: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::log10, "elt.log10")); case Builtin::BI__builtin_elementwise_pow: { return RValue::get( emitBuiltinWithOneOverloadedType<2>(*this, E, llvm::Intrinsic::pow)); } case Builtin::BI__builtin_elementwise_bitreverse: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::bitreverse, "elt.bitreverse")); case Builtin::BI__builtin_elementwise_cos: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::cos, "elt.cos")); case Builtin::BI__builtin_elementwise_cosh: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::cosh, "elt.cosh")); case Builtin::BI__builtin_elementwise_floor: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::floor, "elt.floor")); case Builtin::BI__builtin_elementwise_roundeven: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::roundeven, "elt.roundeven")); case Builtin::BI__builtin_elementwise_round: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::round, "elt.round")); case Builtin::BI__builtin_elementwise_rint: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::rint, "elt.rint")); case Builtin::BI__builtin_elementwise_nearbyint: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::nearbyint, "elt.nearbyint")); case Builtin::BI__builtin_elementwise_sin: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::sin, "elt.sin")); case Builtin::BI__builtin_elementwise_sinh: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::sinh, "elt.sinh")); case Builtin::BI__builtin_elementwise_tan: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::tan, "elt.tan")); case Builtin::BI__builtin_elementwise_tanh: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::tanh, "elt.tanh")); case Builtin::BI__builtin_elementwise_trunc: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::trunc, "elt.trunc")); case Builtin::BI__builtin_elementwise_canonicalize: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::canonicalize, "elt.canonicalize")); case Builtin::BI__builtin_elementwise_copysign: return RValue::get(emitBuiltinWithOneOverloadedType<2>( *this, E, llvm::Intrinsic::copysign)); case Builtin::BI__builtin_elementwise_fma: return RValue::get( emitBuiltinWithOneOverloadedType<3>(*this, E, llvm::Intrinsic::fma)); case Builtin::BI__builtin_elementwise_add_sat: case Builtin::BI__builtin_elementwise_sub_sat: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Result; assert(Op0->getType()->isIntOrIntVectorTy() && "integer type expected"); QualType Ty = E->getArg(0)->getType(); if (auto *VecTy = Ty->getAs()) Ty = VecTy->getElementType(); bool IsSigned = Ty->isSignedIntegerType(); unsigned Opc; if (BuiltinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat) Opc = IsSigned ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat; else Opc = IsSigned ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat; Result = Builder.CreateBinaryIntrinsic(Opc, Op0, Op1, nullptr, "elt.sat"); return RValue::get(Result); } case Builtin::BI__builtin_elementwise_max: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Result; if (Op0->getType()->isIntOrIntVectorTy()) { QualType Ty = E->getArg(0)->getType(); if (auto *VecTy = Ty->getAs()) Ty = VecTy->getElementType(); Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() ? llvm::Intrinsic::smax : llvm::Intrinsic::umax, Op0, Op1, nullptr, "elt.max"); } else Result = Builder.CreateMaxNum(Op0, Op1, "elt.max"); return RValue::get(Result); } case Builtin::BI__builtin_elementwise_min: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Result; if (Op0->getType()->isIntOrIntVectorTy()) { QualType Ty = E->getArg(0)->getType(); if (auto *VecTy = Ty->getAs()) Ty = VecTy->getElementType(); Result = Builder.CreateBinaryIntrinsic(Ty->isSignedIntegerType() ? llvm::Intrinsic::smin : llvm::Intrinsic::umin, Op0, Op1, nullptr, "elt.min"); } else Result = Builder.CreateMinNum(Op0, Op1, "elt.min"); return RValue::get(Result); } case Builtin::BI__builtin_reduce_max: { auto GetIntrinsicID = [this](QualType QT) { if (auto *VecTy = QT->getAs()) QT = VecTy->getElementType(); else if (QT->isSizelessVectorType()) QT = QT->getSizelessVectorEltType(CGM.getContext()); if (QT->isSignedIntegerType()) return llvm::Intrinsic::vector_reduce_smax; if (QT->isUnsignedIntegerType()) return llvm::Intrinsic::vector_reduce_umax; assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmax; }; return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } case Builtin::BI__builtin_reduce_min: { auto GetIntrinsicID = [this](QualType QT) { if (auto *VecTy = QT->getAs()) QT = VecTy->getElementType(); else if (QT->isSizelessVectorType()) QT = QT->getSizelessVectorEltType(CGM.getContext()); if (QT->isSignedIntegerType()) return llvm::Intrinsic::vector_reduce_smin; if (QT->isUnsignedIntegerType()) return llvm::Intrinsic::vector_reduce_umin; assert(QT->isFloatingType() && "must have a float here"); return llvm::Intrinsic::vector_reduce_fmin; }; return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, GetIntrinsicID(E->getArg(0)->getType()), "rdx.min")); } case Builtin::BI__builtin_reduce_add: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_add, "rdx.add")); case Builtin::BI__builtin_reduce_mul: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_mul, "rdx.mul")); case Builtin::BI__builtin_reduce_xor: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_xor, "rdx.xor")); case Builtin::BI__builtin_reduce_or: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_or, "rdx.or")); case Builtin::BI__builtin_reduce_and: return RValue::get(emitBuiltinWithOneOverloadedType<1>( *this, E, llvm::Intrinsic::vector_reduce_and, "rdx.and")); case Builtin::BI__builtin_matrix_transpose: { auto *MatrixTy = E->getArg(0)->getType()->castAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); MatrixBuilder MB(Builder); Value *Result = MB.CreateMatrixTranspose(MatValue, MatrixTy->getNumRows(), MatrixTy->getNumColumns()); return RValue::get(Result); } case Builtin::BI__builtin_matrix_column_major_load: { MatrixBuilder MB(Builder); // Emit everything that isn't dependent on the first parameter type Value *Stride = EmitScalarExpr(E->getArg(3)); const auto *ResultTy = E->getType()->getAs(); auto *PtrTy = E->getArg(0)->getType()->getAs(); assert(PtrTy && "arg0 must be of pointer type"); bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); Address Src = EmitPointerWithAlignment(E->getArg(0)); EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Value *Result = MB.CreateColumnMajorLoad( Src.getElementType(), Src.emitRawPointer(*this), Align(Src.getAlignment().getQuantity()), Stride, IsVolatile, ResultTy->getNumRows(), ResultTy->getNumColumns(), "matrix"); return RValue::get(Result); } case Builtin::BI__builtin_matrix_column_major_store: { MatrixBuilder MB(Builder); Value *Matrix = EmitScalarExpr(E->getArg(0)); Address Dst = EmitPointerWithAlignment(E->getArg(1)); Value *Stride = EmitScalarExpr(E->getArg(2)); const auto *MatrixTy = E->getArg(0)->getType()->getAs(); auto *PtrTy = E->getArg(1)->getType()->getAs(); assert(PtrTy && "arg1 must be of pointer type"); bool IsVolatile = PtrTy->getPointeeType().isVolatileQualified(); EmitNonNullArgCheck(RValue::get(Dst.emitRawPointer(*this)), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 0); Value *Result = MB.CreateColumnMajorStore( Matrix, Dst.emitRawPointer(*this), Align(Dst.getAlignment().getQuantity()), Stride, IsVolatile, MatrixTy->getNumRows(), MatrixTy->getNumColumns()); return RValue::get(Result); } case Builtin::BI__builtin_isinf_sign: { // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *Arg = EmitScalarExpr(E->getArg(0)); Value *AbsArg = EmitFAbs(*this, Arg); Value *IsInf = Builder.CreateFCmpOEQ( AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); Value *IsNeg = EmitSignBit(*this, Arg); llvm::Type *IntTy = ConvertType(E->getType()); Value *Zero = Constant::getNullValue(IntTy); Value *One = ConstantInt::get(IntTy, 1); Value *NegativeOne = ConstantInt::get(IntTy, -1); Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); return RValue::get(Result); } case Builtin::BI__builtin_flt_rounds: { Function *F = CGM.getIntrinsic(Intrinsic::get_rounding); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateCall(F); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return RValue::get(Result); } case Builtin::BI__builtin_set_flt_rounds: { Function *F = CGM.getIntrinsic(Intrinsic::set_rounding); Value *V = EmitScalarExpr(E->getArg(0)); Builder.CreateCall(F, V); return RValue::get(nullptr); } case Builtin::BI__builtin_fpclassify: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); // Create Result BasicBlock *Begin = Builder.GetInsertBlock(); BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); Builder.SetInsertPoint(End); PHINode *Result = Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, "fpclassify_result"); // if (V==0) return FP_ZERO Builder.SetInsertPoint(Begin); Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), "iszero"); Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); Builder.CreateCondBr(IsZero, End, NotZero); Result->addIncoming(ZeroLiteral, Begin); // if (V != V) return FP_NAN Builder.SetInsertPoint(NotZero); Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); Value *NanLiteral = EmitScalarExpr(E->getArg(0)); BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); Builder.CreateCondBr(IsNan, End, NotNan); Result->addIncoming(NanLiteral, NotZero); // if (fabs(V) == infinity) return FP_INFINITY Builder.SetInsertPoint(NotNan); Value *VAbs = EmitFAbs(*this, V); Value *IsInf = Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), "isinf"); Value *InfLiteral = EmitScalarExpr(E->getArg(1)); BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); Builder.CreateCondBr(IsInf, End, NotInf); Result->addIncoming(InfLiteral, NotNan); // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL Builder.SetInsertPoint(NotInf); APFloat Smallest = APFloat::getSmallestNormalized( getContext().getFloatTypeSemantics(E->getArg(5)->getType())); Value *IsNormal = Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), "isnormal"); Value *NormalResult = Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))); Builder.CreateBr(End); Result->addIncoming(NormalResult, NotInf); // return Result Builder.SetInsertPoint(End); return RValue::get(Result); } // An alloca will always return a pointer to the alloca (stack) address // space. This address space need not be the same as the AST / Language // default (e.g. in C / C++ auto vars are in the generic address space). At // the AST level this is handled within CreateTempAlloca et al., but for the // builtin / dynamic alloca we have to handle it here. We use an explicit cast // instead of passing an AS to CreateAlloca so as to not inhibit optimisation. case Builtin::BIalloca: case Builtin::BI_alloca: case Builtin::BI__builtin_alloca_uninitialized: case Builtin::BI__builtin_alloca: { Value *Size = EmitScalarExpr(E->getArg(0)); const TargetInfo &TI = getContext().getTargetInfo(); // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__. const Align SuitableAlignmentInBytes = CGM.getContext() .toCharUnitsFromBits(TI.getSuitableAlign()) .getAsAlign(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(SuitableAlignmentInBytes); if (BuiltinID != Builtin::BI__builtin_alloca_uninitialized) initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes); LangAS AAS = getASTAllocaAddressSpace(); LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); if (AAS != EAS) { llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, EAS, Ty)); } return RValue::get(AI); } case Builtin::BI__builtin_alloca_with_align_uninitialized: case Builtin::BI__builtin_alloca_with_align: { Value *Size = EmitScalarExpr(E->getArg(0)); Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1)); auto *AlignmentInBitsCI = cast(AlignmentInBitsValue); unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue(); const Align AlignmentInBytes = CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getAsAlign(); AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size); AI->setAlignment(AlignmentInBytes); if (BuiltinID != Builtin::BI__builtin_alloca_with_align_uninitialized) initializeAlloca(*this, AI, Size, AlignmentInBytes); LangAS AAS = getASTAllocaAddressSpace(); LangAS EAS = E->getType()->getPointeeType().getAddressSpace(); if (AAS != EAS) { llvm::Type *Ty = CGM.getTypes().ConvertType(E->getType()); return RValue::get(getTargetHooks().performAddrSpaceCast(*this, AI, AAS, EAS, Ty)); } return RValue::get(AI); } case Builtin::BIbzero: case Builtin::BI__builtin_bzero: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *SizeVal = EmitScalarExpr(E->getArg(1)); EmitNonNullArgCheck(Dest, E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); return RValue::get(nullptr); } case Builtin::BIbcopy: case Builtin::BI__builtin_bcopy: { Address Src = EmitPointerWithAlignment(E->getArg(0)); Address Dest = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); EmitNonNullArgCheck(RValue::get(Src.emitRawPointer(*this)), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 0); Builder.CreateMemMove(Dest, Src, SizeVal, false); return RValue::get(nullptr); } case Builtin::BImemcpy: case Builtin::BI__builtin_memcpy: case Builtin::BImempcpy: case Builtin::BI__builtin_mempcpy: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); Builder.CreateMemCpy(Dest, Src, SizeVal, false); if (BuiltinID == Builtin::BImempcpy || BuiltinID == Builtin::BI__builtin_mempcpy) return RValue::get(Builder.CreateInBoundsGEP( Dest.getElementType(), Dest.emitRawPointer(*this), SizeVal)); else return RValue::get(Dest, *this); } case Builtin::BI__builtin_memcpy_inline: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); uint64_t Size = E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); Builder.CreateMemCpyInline(Dest, Src, Size); return RValue::get(nullptr); } case Builtin::BI__builtin_char_memchr: BuiltinID = Builtin::BI__builtin_memchr; break; case Builtin::BI__builtin___memcpy_chk: { // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; llvm::APSInt Size = SizeResult.Val.getInt(); llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); Builder.CreateMemCpy(Dest, Src, SizeVal, false); return RValue::get(Dest, *this); } case Builtin::BI__builtin_objc_memmove_collectable: { Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestAddr, SrcAddr, SizeVal); return RValue::get(DestAddr, *this); } case Builtin::BI__builtin___memmove_chk: { // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; llvm::APSInt Size = SizeResult.Val.getInt(); llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); Builder.CreateMemMove(Dest, Src, SizeVal, false); return RValue::get(Dest, *this); } case Builtin::BImemmove: case Builtin::BI__builtin_memmove: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); EmitArgCheck(TCK_Store, Dest, E->getArg(0), 0); EmitArgCheck(TCK_Load, Src, E->getArg(1), 1); Builder.CreateMemMove(Dest, Src, SizeVal, false); return RValue::get(Dest, *this); } case Builtin::BImemset: case Builtin::BI__builtin_memset: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); Value *SizeVal = EmitScalarExpr(E->getArg(2)); EmitNonNullArgCheck(Dest, E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); return RValue::get(Dest, *this); } case Builtin::BI__builtin_memset_inline: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); uint64_t Size = E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); EmitNonNullArgCheck(RValue::get(Dest.emitRawPointer(*this)), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); Builder.CreateMemSetInline(Dest, ByteVal, Size); return RValue::get(nullptr); } case Builtin::BI__builtin___memset_chk: { // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext())) break; llvm::APSInt Size = SizeResult.Val.getInt(); llvm::APSInt DstSize = DstSizeResult.Val.getInt(); if (Size.ugt(DstSize)) break; Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); return RValue::get(Dest, *this); } case Builtin::BI__builtin_wmemchr: { // The MSVC runtime library does not provide a definition of wmemchr, so we // need an inline implementation. if (!getTarget().getTriple().isOSMSVCRT()) break; llvm::Type *WCharTy = ConvertType(getContext().WCharTy); Value *Str = EmitScalarExpr(E->getArg(0)); Value *Chr = EmitScalarExpr(E->getArg(1)); Value *Size = EmitScalarExpr(E->getArg(2)); BasicBlock *Entry = Builder.GetInsertBlock(); BasicBlock *CmpEq = createBasicBlock("wmemchr.eq"); BasicBlock *Next = createBasicBlock("wmemchr.next"); BasicBlock *Exit = createBasicBlock("wmemchr.exit"); Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); Builder.CreateCondBr(SizeEq0, Exit, CmpEq); EmitBlock(CmpEq); PHINode *StrPhi = Builder.CreatePHI(Str->getType(), 2); StrPhi->addIncoming(Str, Entry); PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); SizePhi->addIncoming(Size, Entry); CharUnits WCharAlign = getContext().getTypeAlignInChars(getContext().WCharTy); Value *StrCh = Builder.CreateAlignedLoad(WCharTy, StrPhi, WCharAlign); Value *FoundChr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 0); Value *StrEqChr = Builder.CreateICmpEQ(StrCh, Chr); Builder.CreateCondBr(StrEqChr, Exit, Next); EmitBlock(Next); Value *NextStr = Builder.CreateConstInBoundsGEP1_32(WCharTy, StrPhi, 1); Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); Value *NextSizeEq0 = Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); Builder.CreateCondBr(NextSizeEq0, Exit, CmpEq); StrPhi->addIncoming(NextStr, Next); SizePhi->addIncoming(NextSize, Next); EmitBlock(Exit); PHINode *Ret = Builder.CreatePHI(Str->getType(), 3); Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Entry); Ret->addIncoming(llvm::Constant::getNullValue(Str->getType()), Next); Ret->addIncoming(FoundChr, CmpEq); return RValue::get(Ret); } case Builtin::BI__builtin_wmemcmp: { // The MSVC runtime library does not provide a definition of wmemcmp, so we // need an inline implementation. if (!getTarget().getTriple().isOSMSVCRT()) break; llvm::Type *WCharTy = ConvertType(getContext().WCharTy); Value *Dst = EmitScalarExpr(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Size = EmitScalarExpr(E->getArg(2)); BasicBlock *Entry = Builder.GetInsertBlock(); BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt"); BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt"); BasicBlock *Next = createBasicBlock("wmemcmp.next"); BasicBlock *Exit = createBasicBlock("wmemcmp.exit"); Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0)); Builder.CreateCondBr(SizeEq0, Exit, CmpGT); EmitBlock(CmpGT); PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2); DstPhi->addIncoming(Dst, Entry); PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2); SrcPhi->addIncoming(Src, Entry); PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2); SizePhi->addIncoming(Size, Entry); CharUnits WCharAlign = getContext().getTypeAlignInChars(getContext().WCharTy); Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign); Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign); Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh); Builder.CreateCondBr(DstGtSrc, Exit, CmpLT); EmitBlock(CmpLT); Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh); Builder.CreateCondBr(DstLtSrc, Exit, Next); EmitBlock(Next); Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1); Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1); Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1)); Value *NextSizeEq0 = Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0)); Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT); DstPhi->addIncoming(NextDst, Next); SrcPhi->addIncoming(NextSrc, Next); SizePhi->addIncoming(NextSize, Next); EmitBlock(Exit); PHINode *Ret = Builder.CreatePHI(IntTy, 4); Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry); Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT); Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT); Ret->addIncoming(ConstantInt::get(IntTy, 0), Next); return RValue::get(Ret); } case Builtin::BI__builtin_dwarf_cfa: { // The offset in bytes from the first argument to the CFA. // // Why on earth is this in the frontend? Is there any reason at // all that the backend can't reasonably determine this while // lowering llvm.eh.dwarf.cfa()? // // TODO: If there's a satisfactory reason, add a target hook for // this instead of hard-coding 0, which is correct for most targets. int32_t Offset = 0; Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); return RValue::get(Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI_ReturnAddress: { Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { Value *Address = EmitScalarExpr(E->getArg(0)); Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); return RValue::get(Result); } case Builtin::BI__builtin_frob_return_addr: { Value *Address = EmitScalarExpr(E->getArg(0)); Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); return RValue::get(Result); } case Builtin::BI__builtin_dwarf_sp_column: { llvm::IntegerType *Ty = cast(ConvertType(E->getType())); int Column = getTargetHooks().getDwarfEHStackPointer(CGM); if (Column == -1) { CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); return RValue::get(llvm::UndefValue::get(Ty)); } return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); } case Builtin::BI__builtin_init_dwarf_reg_size_table: { Value *Address = EmitScalarExpr(E->getArg(0)); if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); } case Builtin::BI__builtin_eh_return: { Value *Int = EmitScalarExpr(E->getArg(0)); Value *Ptr = EmitScalarExpr(E->getArg(1)); llvm::IntegerType *IntTy = cast(Int->getType()); assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); Function *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32 : Intrinsic::eh_return_i64); Builder.CreateCall(F, {Int, Ptr}); Builder.CreateUnreachable(); // We do need to preserve an insertion point. EmitBlock(createBasicBlock("builtin_eh_return.cont")); return RValue::get(nullptr); } case Builtin::BI__builtin_unwind_init: { Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); Builder.CreateCall(F); return RValue::get(nullptr); } case Builtin::BI__builtin_extend_pointer: { // Extends a pointer to the size of an _Unwind_Word, which is // uint64_t on all platforms. Generally this gets poked into a // register and eventually used as an address, so if the // addressing registers are wider than pointers and the platform // doesn't implicitly ignore high-order bits when doing // addressing, we need to make sure we zext / sext based on // the platform's expectations. // // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html // Cast the pointer to intptr_t. Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); // If that's 64 bits, we're done. if (IntPtrTy->getBitWidth() == 64) return RValue::get(Result); // Otherwise, ask the codegen data what to do. if (getTargetHooks().extendPointerWithSExt()) return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); else return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); } case Builtin::BI__builtin_setjmp: { // Buffer is a void**. Address Buf = EmitPointerWithAlignment(E->getArg(0)); // Store the frame pointer to the setjmp buffer. Value *FrameAddr = Builder.CreateCall( CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy), ConstantInt::get(Int32Ty, 0)); Builder.CreateStore(FrameAddr, Buf); // Store the stack pointer to the setjmp buffer. Value *StackAddr = Builder.CreateStackSave(); assert(Buf.emitRawPointer(*this)->getType() == StackAddr->getType()); Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2); Builder.CreateStore(StackAddr, StackSaveSlot); // Call LLVM's EH setjmp, which is lightweight. Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); return RValue::get(Builder.CreateCall(F, Buf.emitRawPointer(*this))); } case Builtin::BI__builtin_longjmp: { Value *Buf = EmitScalarExpr(E->getArg(0)); // Call LLVM's EH longjmp, which is lightweight. Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); // longjmp doesn't return; mark this as unreachable. Builder.CreateUnreachable(); // We do need to preserve an insertion point. EmitBlock(createBasicBlock("longjmp.cont")); return RValue::get(nullptr); } case Builtin::BI__builtin_launder: { const Expr *Arg = E->getArg(0); QualType ArgTy = Arg->getType()->getPointeeType(); Value *Ptr = EmitScalarExpr(Arg); if (TypeRequiresBuiltinLaunder(CGM, ArgTy)) Ptr = Builder.CreateLaunderInvariantGroup(Ptr); return RValue::get(Ptr); } case Builtin::BI__sync_fetch_and_add: case Builtin::BI__sync_fetch_and_sub: case Builtin::BI__sync_fetch_and_or: case Builtin::BI__sync_fetch_and_and: case Builtin::BI__sync_fetch_and_xor: case Builtin::BI__sync_fetch_and_nand: case Builtin::BI__sync_add_and_fetch: case Builtin::BI__sync_sub_and_fetch: case Builtin::BI__sync_and_and_fetch: case Builtin::BI__sync_or_and_fetch: case Builtin::BI__sync_xor_and_fetch: case Builtin::BI__sync_nand_and_fetch: case Builtin::BI__sync_val_compare_and_swap: case Builtin::BI__sync_bool_compare_and_swap: case Builtin::BI__sync_lock_test_and_set: case Builtin::BI__sync_lock_release: case Builtin::BI__sync_swap: llvm_unreachable("Shouldn't make it through sema"); case Builtin::BI__sync_fetch_and_add_1: case Builtin::BI__sync_fetch_and_add_2: case Builtin::BI__sync_fetch_and_add_4: case Builtin::BI__sync_fetch_and_add_8: case Builtin::BI__sync_fetch_and_add_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); case Builtin::BI__sync_fetch_and_sub_1: case Builtin::BI__sync_fetch_and_sub_2: case Builtin::BI__sync_fetch_and_sub_4: case Builtin::BI__sync_fetch_and_sub_8: case Builtin::BI__sync_fetch_and_sub_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); case Builtin::BI__sync_fetch_and_or_1: case Builtin::BI__sync_fetch_and_or_2: case Builtin::BI__sync_fetch_and_or_4: case Builtin::BI__sync_fetch_and_or_8: case Builtin::BI__sync_fetch_and_or_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); case Builtin::BI__sync_fetch_and_and_1: case Builtin::BI__sync_fetch_and_and_2: case Builtin::BI__sync_fetch_and_and_4: case Builtin::BI__sync_fetch_and_and_8: case Builtin::BI__sync_fetch_and_and_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); case Builtin::BI__sync_fetch_and_xor_1: case Builtin::BI__sync_fetch_and_xor_2: case Builtin::BI__sync_fetch_and_xor_4: case Builtin::BI__sync_fetch_and_xor_8: case Builtin::BI__sync_fetch_and_xor_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); case Builtin::BI__sync_fetch_and_nand_1: case Builtin::BI__sync_fetch_and_nand_2: case Builtin::BI__sync_fetch_and_nand_4: case Builtin::BI__sync_fetch_and_nand_8: case Builtin::BI__sync_fetch_and_nand_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); // Clang extensions: not overloaded yet. case Builtin::BI__sync_fetch_and_min: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); case Builtin::BI__sync_fetch_and_max: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); case Builtin::BI__sync_fetch_and_umin: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); case Builtin::BI__sync_fetch_and_umax: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); case Builtin::BI__sync_add_and_fetch_1: case Builtin::BI__sync_add_and_fetch_2: case Builtin::BI__sync_add_and_fetch_4: case Builtin::BI__sync_add_and_fetch_8: case Builtin::BI__sync_add_and_fetch_16: return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, llvm::Instruction::Add); case Builtin::BI__sync_sub_and_fetch_1: case Builtin::BI__sync_sub_and_fetch_2: case Builtin::BI__sync_sub_and_fetch_4: case Builtin::BI__sync_sub_and_fetch_8: case Builtin::BI__sync_sub_and_fetch_16: return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, llvm::Instruction::Sub); case Builtin::BI__sync_and_and_fetch_1: case Builtin::BI__sync_and_and_fetch_2: case Builtin::BI__sync_and_and_fetch_4: case Builtin::BI__sync_and_and_fetch_8: case Builtin::BI__sync_and_and_fetch_16: return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, llvm::Instruction::And); case Builtin::BI__sync_or_and_fetch_1: case Builtin::BI__sync_or_and_fetch_2: case Builtin::BI__sync_or_and_fetch_4: case Builtin::BI__sync_or_and_fetch_8: case Builtin::BI__sync_or_and_fetch_16: return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, llvm::Instruction::Or); case Builtin::BI__sync_xor_and_fetch_1: case Builtin::BI__sync_xor_and_fetch_2: case Builtin::BI__sync_xor_and_fetch_4: case Builtin::BI__sync_xor_and_fetch_8: case Builtin::BI__sync_xor_and_fetch_16: return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, llvm::Instruction::Xor); case Builtin::BI__sync_nand_and_fetch_1: case Builtin::BI__sync_nand_and_fetch_2: case Builtin::BI__sync_nand_and_fetch_4: case Builtin::BI__sync_nand_and_fetch_8: case Builtin::BI__sync_nand_and_fetch_16: return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, llvm::Instruction::And, true); case Builtin::BI__sync_val_compare_and_swap_1: case Builtin::BI__sync_val_compare_and_swap_2: case Builtin::BI__sync_val_compare_and_swap_4: case Builtin::BI__sync_val_compare_and_swap_8: case Builtin::BI__sync_val_compare_and_swap_16: return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); case Builtin::BI__sync_bool_compare_and_swap_1: case Builtin::BI__sync_bool_compare_and_swap_2: case Builtin::BI__sync_bool_compare_and_swap_4: case Builtin::BI__sync_bool_compare_and_swap_8: case Builtin::BI__sync_bool_compare_and_swap_16: return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); case Builtin::BI__sync_swap_1: case Builtin::BI__sync_swap_2: case Builtin::BI__sync_swap_4: case Builtin::BI__sync_swap_8: case Builtin::BI__sync_swap_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); case Builtin::BI__sync_lock_test_and_set_1: case Builtin::BI__sync_lock_test_and_set_2: case Builtin::BI__sync_lock_test_and_set_4: case Builtin::BI__sync_lock_test_and_set_8: case Builtin::BI__sync_lock_test_and_set_16: return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); case Builtin::BI__sync_lock_release_1: case Builtin::BI__sync_lock_release_2: case Builtin::BI__sync_lock_release_4: case Builtin::BI__sync_lock_release_8: case Builtin::BI__sync_lock_release_16: { Address Ptr = CheckAtomicAlignment(*this, E); QualType ElTy = E->getArg(0)->getType()->getPointeeType(); llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(ElTy)); llvm::StoreInst *Store = Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); Store->setAtomic(llvm::AtomicOrdering::Release); return RValue::get(nullptr); } case Builtin::BI__sync_synchronize: { // We assume this is supposed to correspond to a C++0x-style // sequentially-consistent fence (i.e. this is only usable for // synchronization, not device I/O or anything like that). This intrinsic // is really badly designed in the sense that in theory, there isn't // any way to safely use it... but in practice, it mostly works // to use it with non-atomic loads and stores to get acquire/release // semantics. Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); return RValue::get(nullptr); } case Builtin::BI__builtin_nontemporal_load: return RValue::get(EmitNontemporalLoad(*this, E)); case Builtin::BI__builtin_nontemporal_store: return RValue::get(EmitNontemporalStore(*this, E)); case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: { // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since // _Atomic(T) is always properly-aligned. const char *LibCallName = "__atomic_is_lock_free"; CallArgList Args; Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), getContext().getSizeType()); if (BuiltinID == Builtin::BI__atomic_is_lock_free) Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), getContext().VoidPtrTy); else Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), getContext().VoidPtrTy); const CGFunctionInfo &FuncInfo = CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); return EmitCall(FuncInfo, CGCallee::forDirect(Func), ReturnValueSlot(), Args); } case Builtin::BI__atomic_test_and_set: { // Look at the argument type to determine whether this is a volatile // operation. The parameter type is always volatile. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); bool Volatile = PtrTy->castAs()->getPointeeType().isVolatileQualified(); Address Ptr = EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty); Value *NewVal = Builder.getInt8(1); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa(Order)) { int ord = cast(Order)->getZExtValue(); AtomicRMWInst *Result = nullptr; switch (ord) { case 0: // memory_order_relaxed default: // invalid order Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, llvm::AtomicOrdering::Monotonic); break; case 1: // memory_order_consume case 2: // memory_order_acquire Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, llvm::AtomicOrdering::Acquire); break; case 3: // memory_order_release Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, llvm::AtomicOrdering::Release); break; case 4: // memory_order_acq_rel Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, llvm::AtomicOrdering::AcquireRelease); break; case 5: // memory_order_seq_cst Result = Builder.CreateAtomicRMW( llvm::AtomicRMWInst::Xchg, Ptr, NewVal, llvm::AtomicOrdering::SequentiallyConsistent); break; } Result->setVolatile(Volatile); return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); } llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); llvm::BasicBlock *BBs[5] = { createBasicBlock("monotonic", CurFn), createBasicBlock("acquire", CurFn), createBasicBlock("release", CurFn), createBasicBlock("acqrel", CurFn), createBasicBlock("seqcst", CurFn) }; llvm::AtomicOrdering Orders[5] = { llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, llvm::AtomicOrdering::SequentiallyConsistent}; Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); Builder.SetInsertPoint(ContBB); PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); for (unsigned i = 0; i < 5; ++i) { Builder.SetInsertPoint(BBs[i]); AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, Orders[i]); RMW->setVolatile(Volatile); Result->addIncoming(RMW, BBs[i]); Builder.CreateBr(ContBB); } SI->addCase(Builder.getInt32(0), BBs[0]); SI->addCase(Builder.getInt32(1), BBs[1]); SI->addCase(Builder.getInt32(2), BBs[1]); SI->addCase(Builder.getInt32(3), BBs[2]); SI->addCase(Builder.getInt32(4), BBs[3]); SI->addCase(Builder.getInt32(5), BBs[4]); Builder.SetInsertPoint(ContBB); return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); } case Builtin::BI__atomic_clear: { QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); bool Volatile = PtrTy->castAs()->getPointeeType().isVolatileQualified(); Address Ptr = EmitPointerWithAlignment(E->getArg(0)); Ptr = Ptr.withElementType(Int8Ty); Value *NewVal = Builder.getInt8(0); Value *Order = EmitScalarExpr(E->getArg(1)); if (isa(Order)) { int ord = cast(Order)->getZExtValue(); StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); switch (ord) { case 0: // memory_order_relaxed default: // invalid order Store->setOrdering(llvm::AtomicOrdering::Monotonic); break; case 3: // memory_order_release Store->setOrdering(llvm::AtomicOrdering::Release); break; case 5: // memory_order_seq_cst Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); break; } return RValue::get(nullptr); } llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); llvm::BasicBlock *BBs[3] = { createBasicBlock("monotonic", CurFn), createBasicBlock("release", CurFn), createBasicBlock("seqcst", CurFn) }; llvm::AtomicOrdering Orders[3] = { llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, llvm::AtomicOrdering::SequentiallyConsistent}; Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); for (unsigned i = 0; i < 3; ++i) { Builder.SetInsertPoint(BBs[i]); StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); Store->setOrdering(Orders[i]); Builder.CreateBr(ContBB); } SI->addCase(Builder.getInt32(0), BBs[0]); SI->addCase(Builder.getInt32(3), BBs[1]); SI->addCase(Builder.getInt32(5), BBs[2]); Builder.SetInsertPoint(ContBB); return RValue::get(nullptr); } case Builtin::BI__atomic_thread_fence: case Builtin::BI__atomic_signal_fence: case Builtin::BI__c11_atomic_thread_fence: case Builtin::BI__c11_atomic_signal_fence: { llvm::SyncScope::ID SSID; if (BuiltinID == Builtin::BI__atomic_signal_fence || BuiltinID == Builtin::BI__c11_atomic_signal_fence) SSID = llvm::SyncScope::SingleThread; else SSID = llvm::SyncScope::System; Value *Order = EmitScalarExpr(E->getArg(0)); if (isa(Order)) { int ord = cast(Order)->getZExtValue(); switch (ord) { case 0: // memory_order_relaxed default: // invalid order break; case 1: // memory_order_consume case 2: // memory_order_acquire Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); break; case 3: // memory_order_release Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); break; case 4: // memory_order_acq_rel Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); break; case 5: // memory_order_seq_cst Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); break; } return RValue::get(nullptr); } llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; AcquireBB = createBasicBlock("acquire", CurFn); ReleaseBB = createBasicBlock("release", CurFn); AcqRelBB = createBasicBlock("acqrel", CurFn); SeqCstBB = createBasicBlock("seqcst", CurFn); llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); Builder.SetInsertPoint(AcquireBB); Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(1), AcquireBB); SI->addCase(Builder.getInt32(2), AcquireBB); Builder.SetInsertPoint(ReleaseBB); Builder.CreateFence(llvm::AtomicOrdering::Release, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(3), ReleaseBB); Builder.SetInsertPoint(AcqRelBB); Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(4), AcqRelBB); Builder.SetInsertPoint(SeqCstBB); Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID); Builder.CreateBr(ContBB); SI->addCase(Builder.getInt32(5), SeqCstBB); Builder.SetInsertPoint(ContBB); return RValue::get(nullptr); } case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: case Builtin::BI__builtin_signbitl: { return RValue::get( Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), ConvertType(E->getType()))); } case Builtin::BI__warn_memset_zero_len: return RValue::getIgnored(); case Builtin::BI__annotation: { // Re-encode each wide string to UTF8 and make an MDString. SmallVector Strings; for (const Expr *Arg : E->arguments()) { const auto *Str = cast(Arg->IgnoreParenCasts()); assert(Str->getCharByteWidth() == 2); StringRef WideBytes = Str->getBytes(); std::string StrUtf8; if (!convertUTF16ToUTF8String( ArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) { CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument"); continue; } Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8)); } // Build and MDTuple of MDStrings and emit the intrinsic call. llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {}); MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings); Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple)); return RValue::getIgnored(); } case Builtin::BI__builtin_annotation: { llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, {AnnVal->getType(), CGM.ConstGlobalsPtrTy}); // Get the annotation string, go through casts. Sema requires this to be a // non-wide string literal, potentially casted, so the cast<> is safe. const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); StringRef Str = cast(AnnotationStrExpr)->getString(); return RValue::get( EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc(), nullptr)); } case Builtin::BI__builtin_addcb: case Builtin::BI__builtin_addcs: case Builtin::BI__builtin_addc: case Builtin::BI__builtin_addcl: case Builtin::BI__builtin_addcll: case Builtin::BI__builtin_subcb: case Builtin::BI__builtin_subcs: case Builtin::BI__builtin_subc: case Builtin::BI__builtin_subcl: case Builtin::BI__builtin_subcll: { // We translate all of these builtins from expressions of the form: // int x = ..., y = ..., carryin = ..., carryout, result; // result = __builtin_addc(x, y, carryin, &carryout); // // to LLVM IR of the form: // // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 // %carry1 = extractvalue {i32, i1} %tmp1, 1 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, // i32 %carryin) // %result = extractvalue {i32, i1} %tmp2, 0 // %carry2 = extractvalue {i32, i1} %tmp2, 1 // %tmp3 = or i1 %carry1, %carry2 // %tmp4 = zext i1 %tmp3 to i32 // store i32 %tmp4, i32* %carryout // Scalarize our inputs. llvm::Value *X = EmitScalarExpr(E->getArg(0)); llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. llvm::Intrinsic::ID IntrinsicId; switch (BuiltinID) { default: llvm_unreachable("Unknown multiprecision builtin id."); case Builtin::BI__builtin_addcb: case Builtin::BI__builtin_addcs: case Builtin::BI__builtin_addc: case Builtin::BI__builtin_addcl: case Builtin::BI__builtin_addcll: IntrinsicId = llvm::Intrinsic::uadd_with_overflow; break; case Builtin::BI__builtin_subcb: case Builtin::BI__builtin_subcs: case Builtin::BI__builtin_subc: case Builtin::BI__builtin_subcl: case Builtin::BI__builtin_subcll: IntrinsicId = llvm::Intrinsic::usub_with_overflow; break; } // Construct our resulting LLVM IR expression. llvm::Value *Carry1; llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry1); llvm::Value *Carry2; llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, Sum1, Carryin, Carry2); llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), X->getType()); Builder.CreateStore(CarryOut, CarryOutPtr); return RValue::get(Sum2); } case Builtin::BI__builtin_add_overflow: case Builtin::BI__builtin_sub_overflow: case Builtin::BI__builtin_mul_overflow: { const clang::Expr *LeftArg = E->getArg(0); const clang::Expr *RightArg = E->getArg(1); const clang::Expr *ResultArg = E->getArg(2); clang::QualType ResultQTy = ResultArg->getType()->castAs()->getPointeeType(); WidthAndSignedness LeftInfo = getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); WidthAndSignedness RightInfo = getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); WidthAndSignedness ResultInfo = getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); // Handle mixed-sign multiplication as a special case, because adding // runtime or backend support for our generic irgen would be too expensive. if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo)) return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy, ResultInfo); if (isSpecialUnsignedMultiplySignedResult(BuiltinID, LeftInfo, RightInfo, ResultInfo)) return EmitCheckedUnsignedMultiplySignedResult( *this, LeftArg, LeftInfo, RightArg, RightInfo, ResultArg, ResultQTy, ResultInfo); WidthAndSignedness EncompassingInfo = EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); llvm::Type *EncompassingLLVMTy = llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); llvm::Intrinsic::ID IntrinsicId; switch (BuiltinID) { default: llvm_unreachable("Unknown overflow builtin id."); case Builtin::BI__builtin_add_overflow: IntrinsicId = EncompassingInfo.Signed ? llvm::Intrinsic::sadd_with_overflow : llvm::Intrinsic::uadd_with_overflow; break; case Builtin::BI__builtin_sub_overflow: IntrinsicId = EncompassingInfo.Signed ? llvm::Intrinsic::ssub_with_overflow : llvm::Intrinsic::usub_with_overflow; break; case Builtin::BI__builtin_mul_overflow: IntrinsicId = EncompassingInfo.Signed ? llvm::Intrinsic::smul_with_overflow : llvm::Intrinsic::umul_with_overflow; break; } llvm::Value *Left = EmitScalarExpr(LeftArg); llvm::Value *Right = EmitScalarExpr(RightArg); Address ResultPtr = EmitPointerWithAlignment(ResultArg); // Extend each operand to the encompassing type. Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); // Perform the operation on the extended values. llvm::Value *Overflow, *Result; Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); if (EncompassingInfo.Width > ResultInfo.Width) { // The encompassing type is wider than the result type, so we need to // truncate it. llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); // To see if the truncation caused an overflow, we will extend // the result and then compare it to the original result. llvm::Value *ResultTruncExt = Builder.CreateIntCast( ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); llvm::Value *TruncationOverflow = Builder.CreateICmpNE(Result, ResultTruncExt); Overflow = Builder.CreateOr(Overflow, TruncationOverflow); Result = ResultTrunc; } // Finally, store the result using the pointer. bool isVolatile = ResultArg->getType()->getPointeeType().isVolatileQualified(); Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); return RValue::get(Overflow); } case Builtin::BI__builtin_uadd_overflow: case Builtin::BI__builtin_uaddl_overflow: case Builtin::BI__builtin_uaddll_overflow: case Builtin::BI__builtin_usub_overflow: case Builtin::BI__builtin_usubl_overflow: case Builtin::BI__builtin_usubll_overflow: case Builtin::BI__builtin_umul_overflow: case Builtin::BI__builtin_umull_overflow: case Builtin::BI__builtin_umulll_overflow: case Builtin::BI__builtin_sadd_overflow: case Builtin::BI__builtin_saddl_overflow: case Builtin::BI__builtin_saddll_overflow: case Builtin::BI__builtin_ssub_overflow: case Builtin::BI__builtin_ssubl_overflow: case Builtin::BI__builtin_ssubll_overflow: case Builtin::BI__builtin_smul_overflow: case Builtin::BI__builtin_smull_overflow: case Builtin::BI__builtin_smulll_overflow: { // We translate all of these builtins directly to the relevant llvm IR node. // Scalarize our inputs. llvm::Value *X = EmitScalarExpr(E->getArg(0)); llvm::Value *Y = EmitScalarExpr(E->getArg(1)); Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); // Decide which of the overflow intrinsics we are lowering to: llvm::Intrinsic::ID IntrinsicId; switch (BuiltinID) { default: llvm_unreachable("Unknown overflow builtin id."); case Builtin::BI__builtin_uadd_overflow: case Builtin::BI__builtin_uaddl_overflow: case Builtin::BI__builtin_uaddll_overflow: IntrinsicId = llvm::Intrinsic::uadd_with_overflow; break; case Builtin::BI__builtin_usub_overflow: case Builtin::BI__builtin_usubl_overflow: case Builtin::BI__builtin_usubll_overflow: IntrinsicId = llvm::Intrinsic::usub_with_overflow; break; case Builtin::BI__builtin_umul_overflow: case Builtin::BI__builtin_umull_overflow: case Builtin::BI__builtin_umulll_overflow: IntrinsicId = llvm::Intrinsic::umul_with_overflow; break; case Builtin::BI__builtin_sadd_overflow: case Builtin::BI__builtin_saddl_overflow: case Builtin::BI__builtin_saddll_overflow: IntrinsicId = llvm::Intrinsic::sadd_with_overflow; break; case Builtin::BI__builtin_ssub_overflow: case Builtin::BI__builtin_ssubl_overflow: case Builtin::BI__builtin_ssubll_overflow: IntrinsicId = llvm::Intrinsic::ssub_with_overflow; break; case Builtin::BI__builtin_smul_overflow: case Builtin::BI__builtin_smull_overflow: case Builtin::BI__builtin_smulll_overflow: IntrinsicId = llvm::Intrinsic::smul_with_overflow; break; } llvm::Value *Carry; llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); Builder.CreateStore(Sum, SumOutPtr); return RValue::get(Carry); } case Builtin::BIaddressof: case Builtin::BI__addressof: case Builtin::BI__builtin_addressof: return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); case Builtin::BI__builtin_function_start: return RValue::get(CGM.GetFunctionStart( E->getArg(0)->getAsBuiltinConstantDeclRef(CGM.getContext()))); case Builtin::BI__builtin_operator_new: return EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs(), E, false); case Builtin::BI__builtin_operator_delete: EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs(), E, true); return RValue::get(nullptr); case Builtin::BI__builtin_is_aligned: return EmitBuiltinIsAligned(E); case Builtin::BI__builtin_align_up: return EmitBuiltinAlignTo(E, true); case Builtin::BI__builtin_align_down: return EmitBuiltinAlignTo(E, false); case Builtin::BI__noop: // __noop always evaluates to an integer literal zero. return RValue::get(ConstantInt::get(IntTy, 0)); case Builtin::BI__builtin_call_with_static_chain: { const CallExpr *Call = cast(E->getArg(0)); const Expr *Chain = E->getArg(1); return EmitCall(Call->getCallee()->getType(), EmitCallee(Call->getCallee()), Call, ReturnValue, EmitScalarExpr(Chain)); } case Builtin::BI_InterlockedExchange8: case Builtin::BI_InterlockedExchange16: case Builtin::BI_InterlockedExchange: case Builtin::BI_InterlockedExchangePointer: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E)); case Builtin::BI_InterlockedCompareExchangePointer: case Builtin::BI_InterlockedCompareExchangePointer_nf: { llvm::Type *RTy; llvm::IntegerType *IntType = IntegerType::get( getLLVMContext(), getContext().getTypeSize(E->getType())); Address DestAddr = CheckAtomicAlignment(*this, E); llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); RTy = Exchange->getType(); Exchange = Builder.CreatePtrToInt(Exchange, IntType); llvm::Value *Comparand = Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); auto Ordering = BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ? AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent; auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange, Ordering, Ordering); Result->setVolatile(true); return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 0), RTy)); } case Builtin::BI_InterlockedCompareExchange8: case Builtin::BI_InterlockedCompareExchange16: case Builtin::BI_InterlockedCompareExchange: case Builtin::BI_InterlockedCompareExchange64: return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E)); case Builtin::BI_InterlockedIncrement16: case Builtin::BI_InterlockedIncrement: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E)); case Builtin::BI_InterlockedDecrement16: case Builtin::BI_InterlockedDecrement: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E)); case Builtin::BI_InterlockedAnd8: case Builtin::BI_InterlockedAnd16: case Builtin::BI_InterlockedAnd: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E)); case Builtin::BI_InterlockedExchangeAdd8: case Builtin::BI_InterlockedExchangeAdd16: case Builtin::BI_InterlockedExchangeAdd: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E)); case Builtin::BI_InterlockedExchangeSub8: case Builtin::BI_InterlockedExchangeSub16: case Builtin::BI_InterlockedExchangeSub: return RValue::get( EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E)); case Builtin::BI_InterlockedOr8: case Builtin::BI_InterlockedOr16: case Builtin::BI_InterlockedOr: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E)); case Builtin::BI_InterlockedXor8: case Builtin::BI_InterlockedXor16: case Builtin::BI_InterlockedXor: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E)); case Builtin::BI_bittest64: case Builtin::BI_bittest: case Builtin::BI_bittestandcomplement64: case Builtin::BI_bittestandcomplement: case Builtin::BI_bittestandreset64: case Builtin::BI_bittestandreset: case Builtin::BI_bittestandset64: case Builtin::BI_bittestandset: case Builtin::BI_interlockedbittestandreset: case Builtin::BI_interlockedbittestandreset64: case Builtin::BI_interlockedbittestandset64: case Builtin::BI_interlockedbittestandset: case Builtin::BI_interlockedbittestandset_acq: case Builtin::BI_interlockedbittestandset_rel: case Builtin::BI_interlockedbittestandset_nf: case Builtin::BI_interlockedbittestandreset_acq: case Builtin::BI_interlockedbittestandreset_rel: case Builtin::BI_interlockedbittestandreset_nf: return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E)); // These builtins exist to emit regular volatile loads and stores not // affected by the -fms-volatile setting. case Builtin::BI__iso_volatile_load8: case Builtin::BI__iso_volatile_load16: case Builtin::BI__iso_volatile_load32: case Builtin::BI__iso_volatile_load64: return RValue::get(EmitISOVolatileLoad(*this, E)); case Builtin::BI__iso_volatile_store8: case Builtin::BI__iso_volatile_store16: case Builtin::BI__iso_volatile_store32: case Builtin::BI__iso_volatile_store64: return RValue::get(EmitISOVolatileStore(*this, E)); case Builtin::BI__builtin_ptrauth_sign_constant: return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType())); case Builtin::BI__builtin_ptrauth_auth: case Builtin::BI__builtin_ptrauth_auth_and_resign: case Builtin::BI__builtin_ptrauth_blend_discriminator: case Builtin::BI__builtin_ptrauth_sign_generic_data: case Builtin::BI__builtin_ptrauth_sign_unauthenticated: case Builtin::BI__builtin_ptrauth_strip: { // Emit the arguments. SmallVector Args; for (auto argExpr : E->arguments()) Args.push_back(EmitScalarExpr(argExpr)); // Cast the value to intptr_t, saving its original type. llvm::Type *OrigValueType = Args[0]->getType(); if (OrigValueType->isPointerTy()) Args[0] = Builder.CreatePtrToInt(Args[0], IntPtrTy); switch (BuiltinID) { case Builtin::BI__builtin_ptrauth_auth_and_resign: if (Args[4]->getType()->isPointerTy()) Args[4] = Builder.CreatePtrToInt(Args[4], IntPtrTy); [[fallthrough]]; case Builtin::BI__builtin_ptrauth_auth: case Builtin::BI__builtin_ptrauth_sign_unauthenticated: if (Args[2]->getType()->isPointerTy()) Args[2] = Builder.CreatePtrToInt(Args[2], IntPtrTy); break; case Builtin::BI__builtin_ptrauth_sign_generic_data: if (Args[1]->getType()->isPointerTy()) Args[1] = Builder.CreatePtrToInt(Args[1], IntPtrTy); break; case Builtin::BI__builtin_ptrauth_blend_discriminator: case Builtin::BI__builtin_ptrauth_strip: break; } // Call the intrinsic. auto IntrinsicID = [&]() -> unsigned { switch (BuiltinID) { case Builtin::BI__builtin_ptrauth_auth: return llvm::Intrinsic::ptrauth_auth; case Builtin::BI__builtin_ptrauth_auth_and_resign: return llvm::Intrinsic::ptrauth_resign; case Builtin::BI__builtin_ptrauth_blend_discriminator: return llvm::Intrinsic::ptrauth_blend; case Builtin::BI__builtin_ptrauth_sign_generic_data: return llvm::Intrinsic::ptrauth_sign_generic; case Builtin::BI__builtin_ptrauth_sign_unauthenticated: return llvm::Intrinsic::ptrauth_sign; case Builtin::BI__builtin_ptrauth_strip: return llvm::Intrinsic::ptrauth_strip; } llvm_unreachable("bad ptrauth intrinsic"); }(); auto Intrinsic = CGM.getIntrinsic(IntrinsicID); llvm::Value *Result = EmitRuntimeCall(Intrinsic, Args); if (BuiltinID != Builtin::BI__builtin_ptrauth_sign_generic_data && BuiltinID != Builtin::BI__builtin_ptrauth_blend_discriminator && OrigValueType->isPointerTy()) { Result = Builder.CreateIntToPtr(Result, OrigValueType); } return RValue::get(Result); } case Builtin::BI__exception_code: case Builtin::BI_exception_code: return RValue::get(EmitSEHExceptionCode()); case Builtin::BI__exception_info: case Builtin::BI_exception_info: return RValue::get(EmitSEHExceptionInfo()); case Builtin::BI__abnormal_termination: case Builtin::BI_abnormal_termination: return RValue::get(EmitSEHAbnormalTermination()); case Builtin::BI_setjmpex: if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && E->getArg(0)->getType()->isPointerType()) return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); break; case Builtin::BI_setjmp: if (getTarget().getTriple().isOSMSVCRT() && E->getNumArgs() == 1 && E->getArg(0)->getType()->isPointerType()) { if (getTarget().getTriple().getArch() == llvm::Triple::x86) return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E); else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64) return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E); return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E); } break; // C++ std:: builtins. case Builtin::BImove: case Builtin::BImove_if_noexcept: case Builtin::BIforward: case Builtin::BIforward_like: case Builtin::BIas_const: return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); case Builtin::BI__GetExceptionInfo: { if (llvm::GlobalVariable *GV = CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) return RValue::get(GV); break; } case Builtin::BI__fastfail: return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E)); case Builtin::BI__builtin_coro_id: return EmitCoroutineIntrinsic(E, Intrinsic::coro_id); case Builtin::BI__builtin_coro_promise: return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise); case Builtin::BI__builtin_coro_resume: EmitCoroutineIntrinsic(E, Intrinsic::coro_resume); return RValue::get(nullptr); case Builtin::BI__builtin_coro_frame: return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame); case Builtin::BI__builtin_coro_noop: return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop); case Builtin::BI__builtin_coro_free: return EmitCoroutineIntrinsic(E, Intrinsic::coro_free); case Builtin::BI__builtin_coro_destroy: EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy); return RValue::get(nullptr); case Builtin::BI__builtin_coro_done: return EmitCoroutineIntrinsic(E, Intrinsic::coro_done); case Builtin::BI__builtin_coro_alloc: return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc); case Builtin::BI__builtin_coro_begin: return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin); case Builtin::BI__builtin_coro_end: return EmitCoroutineIntrinsic(E, Intrinsic::coro_end); case Builtin::BI__builtin_coro_suspend: return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend); case Builtin::BI__builtin_coro_size: return EmitCoroutineIntrinsic(E, Intrinsic::coro_size); case Builtin::BI__builtin_coro_align: return EmitCoroutineIntrinsic(E, Intrinsic::coro_align); // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions case Builtin::BIread_pipe: case Builtin::BIwrite_pipe: { Value *Arg0 = EmitScalarExpr(E->getArg(0)), *Arg1 = EmitScalarExpr(E->getArg(1)); CGOpenCLRuntime OpenCLRT(CGM); Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); // Type of the generic packet parameter. unsigned GenericAS = getContext().getTargetAddressSpace(LangAS::opencl_generic); llvm::Type *I8PTy = llvm::PointerType::get(getLLVMContext(), GenericAS); // Testing which overloaded version we should generate the call for. if (2U == E->getNumArgs()) { const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" : "__write_pipe_2"; // Creating a generic function type to be able to call with any builtin or // user defined type. llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); return RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast, PacketSize, PacketAlign})); } else { assert(4 == E->getNumArgs() && "Illegal number of parameters to pipe function"); const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" : "__write_pipe_4"; llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy, Int32Ty, Int32Ty}; Value *Arg2 = EmitScalarExpr(E->getArg(2)), *Arg3 = EmitScalarExpr(E->getArg(3)); llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); // We know the third argument is an integer type, but we may need to cast // it to i32. if (Arg2->getType() != Int32Ty) Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); return RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign})); } } // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write // functions case Builtin::BIreserve_read_pipe: case Builtin::BIreserve_write_pipe: case Builtin::BIwork_group_reserve_read_pipe: case Builtin::BIwork_group_reserve_write_pipe: case Builtin::BIsub_group_reserve_read_pipe: case Builtin::BIsub_group_reserve_write_pipe: { // Composing the mangled name for the function. const char *Name; if (BuiltinID == Builtin::BIreserve_read_pipe) Name = "__reserve_read_pipe"; else if (BuiltinID == Builtin::BIreserve_write_pipe) Name = "__reserve_write_pipe"; else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) Name = "__work_group_reserve_read_pipe"; else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) Name = "__work_group_reserve_write_pipe"; else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) Name = "__sub_group_reserve_read_pipe"; else Name = "__sub_group_reserve_write_pipe"; Value *Arg0 = EmitScalarExpr(E->getArg(0)), *Arg1 = EmitScalarExpr(E->getArg(1)); llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); CGOpenCLRuntime OpenCLRT(CGM); Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); // Building the generic function prototype. llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( ReservedIDTy, llvm::ArrayRef(ArgTys), false); // We know the second argument is an integer type, but we may need to cast // it to i32. if (Arg1->getType() != Int32Ty) Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write // functions case Builtin::BIcommit_read_pipe: case Builtin::BIcommit_write_pipe: case Builtin::BIwork_group_commit_read_pipe: case Builtin::BIwork_group_commit_write_pipe: case Builtin::BIsub_group_commit_read_pipe: case Builtin::BIsub_group_commit_write_pipe: { const char *Name; if (BuiltinID == Builtin::BIcommit_read_pipe) Name = "__commit_read_pipe"; else if (BuiltinID == Builtin::BIcommit_write_pipe) Name = "__commit_write_pipe"; else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) Name = "__work_group_commit_read_pipe"; else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) Name = "__work_group_commit_write_pipe"; else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) Name = "__sub_group_commit_read_pipe"; else Name = "__sub_group_commit_write_pipe"; Value *Arg0 = EmitScalarExpr(E->getArg(0)), *Arg1 = EmitScalarExpr(E->getArg(1)); CGOpenCLRuntime OpenCLRT(CGM); Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); // Building the generic function prototype. llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), llvm::ArrayRef(ArgTys), false); return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions case Builtin::BIget_pipe_num_packets: case Builtin::BIget_pipe_max_packets: { const char *BaseName; const auto *PipeTy = E->getArg(0)->getType()->castAs(); if (BuiltinID == Builtin::BIget_pipe_num_packets) BaseName = "__get_pipe_num_packets"; else BaseName = "__get_pipe_max_packets"; std::string Name = std::string(BaseName) + std::string(PipeTy->isReadOnly() ? "_ro" : "_wo"); // Building the generic function prototype. Value *Arg0 = EmitScalarExpr(E->getArg(0)); CGOpenCLRuntime OpenCLRT(CGM); Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0)); Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0)); llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); return RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, PacketSize, PacketAlign})); } // OpenCL v2.0 s6.13.9 - Address space qualifier functions. case Builtin::BIto_global: case Builtin::BIto_local: case Builtin::BIto_private: { auto Arg0 = EmitScalarExpr(E->getArg(0)); auto NewArgT = llvm::PointerType::get( getLLVMContext(), CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); auto NewRetT = llvm::PointerType::get( getLLVMContext(), CGM.getContext().getTargetAddressSpace( E->getType()->getPointeeType().getAddressSpace())); auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); llvm::Value *NewArg; if (Arg0->getType()->getPointerAddressSpace() != NewArgT->getPointerAddressSpace()) NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); else NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); auto NewName = std::string("__") + E->getDirectCallee()->getName().str(); auto NewCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg}); return RValue::get(Builder.CreateBitOrPointerCast(NewCall, ConvertType(E->getType()))); } // OpenCL v2.0, s6.13.17 - Enqueue kernel function. // Table 6.13.17.1 specifies four overload forms of enqueue_kernel. // The code below expands the builtin call to a call to one of the following // functions that an OpenCL runtime library will have to provide: // __enqueue_kernel_basic // __enqueue_kernel_varargs // __enqueue_kernel_basic_events // __enqueue_kernel_events_varargs case Builtin::BIenqueue_kernel: { StringRef Name; // Generated function call name unsigned NumArgs = E->getNumArgs(); llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(2)); llvm::Value *Range = NDRangeL.getAddress().emitRawPointer(*this); llvm::Type *RangeTy = NDRangeL.getAddress().getType(); if (NumArgs == 4) { // The most basic form of the call with parameters: // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) Name = "__enqueue_kernel_basic"; llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy, GenericVoidPtrTy}; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); llvm::Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); llvm::Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); AttrBuilder B(Builder.getContext()); B.addByValAttr(NDRangeL.getAddress().getElementType()); llvm::AttributeList ByValAttrSet = llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B); auto RTCall = EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet), {Queue, Flags, Range, Kernel, Block}); RTCall->setAttributes(ByValAttrSet); return RValue::get(RTCall); } assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); // Create a temporary array to hold the sizes of local pointer arguments // for the block. \p First is the position of the first size argument. auto CreateArrayForSizeVar = [=](unsigned First) -> std::tuple { llvm::APInt ArraySize(32, NumArgs - First); QualType SizeArrayTy = getContext().getConstantArrayType( getContext().getSizeType(), ArraySize, nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes"); llvm::Value *TmpPtr = Tmp.getPointer(); llvm::Value *TmpSize = EmitLifetimeStart( CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr); llvm::Value *ElemPtr; // Each of the following arguments specifies the size of the corresponding // argument passed to the enqueued block. auto *Zero = llvm::ConstantInt::get(IntTy, 0); for (unsigned I = First; I < NumArgs; ++I) { auto *Index = llvm::ConstantInt::get(IntTy, I - First); auto *GEP = Builder.CreateGEP(Tmp.getElementType(), TmpPtr, {Zero, Index}); if (I == First) ElemPtr = GEP; auto *V = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy); Builder.CreateAlignedStore( V, GEP, CGM.getDataLayout().getPrefTypeAlign(SizeTy)); } return std::tie(ElemPtr, TmpSize, TmpPtr); }; // Could have events and/or varargs. if (E->getArg(3)->getType()->isBlockPointerType()) { // No events passed, but has variadic arguments. Name = "__enqueue_kernel_varargs"; auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3)); llvm::Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); llvm::Value *ElemPtr, *TmpSize, *TmpPtr; std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4); // Create a vector of the arguments, as well as a constant value to // express to the runtime the number of variadic arguments. llvm::Value *const Args[] = {Queue, Flags, Range, Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4), ElemPtr}; llvm::Type *const ArgTys[] = { QueueTy, IntTy, RangeTy, GenericVoidPtrTy, GenericVoidPtrTy, IntTy, ElemPtr->getType()}; llvm::FunctionType *FTy = llvm::FunctionType::get(Int32Ty, ArgTys, false); auto Call = RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Args)); if (TmpSize) EmitLifetimeEnd(TmpSize, TmpPtr); return Call; } // Any calls now have event arguments passed. if (NumArgs >= 7) { llvm::PointerType *PtrTy = llvm::PointerType::get( CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); llvm::Value *NumEvents = Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty); // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments // to be a null pointer constant (including `0` literal), we can take it // into account and emit null pointer directly. llvm::Value *EventWaitList = nullptr; if (E->getArg(4)->isNullPointerConstant( getContext(), Expr::NPC_ValueDependentIsNotNull)) { EventWaitList = llvm::ConstantPointerNull::get(PtrTy); } else { EventWaitList = E->getArg(4)->getType()->isArrayType() ? EmitArrayToPointerDecay(E->getArg(4)).emitRawPointer(*this) : EmitScalarExpr(E->getArg(4)); // Convert to generic address space. EventWaitList = Builder.CreatePointerCast(EventWaitList, PtrTy); } llvm::Value *EventRet = nullptr; if (E->getArg(5)->isNullPointerConstant( getContext(), Expr::NPC_ValueDependentIsNotNull)) { EventRet = llvm::ConstantPointerNull::get(PtrTy); } else { EventRet = Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), PtrTy); } auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6)); llvm::Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); llvm::Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); std::vector ArgTys = { QueueTy, Int32Ty, RangeTy, Int32Ty, PtrTy, PtrTy, GenericVoidPtrTy, GenericVoidPtrTy}; std::vector Args = {Queue, Flags, Range, NumEvents, EventWaitList, EventRet, Kernel, Block}; if (NumArgs == 7) { // Has events but no variadics. Name = "__enqueue_kernel_basic_events"; llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); return RValue::get( EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef(Args))); } // Has event info and variadics // Pass the number of variadics to the runtime function too. Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); ArgTys.push_back(Int32Ty); Name = "__enqueue_kernel_events_varargs"; llvm::Value *ElemPtr, *TmpSize, *TmpPtr; std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7); Args.push_back(ElemPtr); ArgTys.push_back(ElemPtr->getType()); llvm::FunctionType *FTy = llvm::FunctionType::get( Int32Ty, llvm::ArrayRef(ArgTys), false); auto Call = RValue::get(EmitRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), llvm::ArrayRef(Args))); if (TmpSize) EmitLifetimeEnd(TmpSize, TmpPtr); return Call; } llvm_unreachable("Unexpected enqueue_kernel signature"); } // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. case Builtin::BIget_kernel_work_group_size: { llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, false), "__get_kernel_work_group_size_impl"), {Kernel, Arg})); } case Builtin::BIget_kernel_preferred_work_group_size_multiple: { llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0)); Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy}, false), "__get_kernel_preferred_work_group_size_multiple_impl"), {Kernel, Arg})); } case Builtin::BIget_kernel_max_sub_group_size_for_ndrange: case Builtin::BIget_kernel_sub_group_count_for_ndrange: { llvm::Type *GenericVoidPtrTy = Builder.getPtrTy( getContext().getTargetAddressSpace(LangAS::opencl_generic)); LValue NDRangeL = EmitAggExprToLValue(E->getArg(0)); llvm::Value *NDRange = NDRangeL.getAddress().emitRawPointer(*this); auto Info = CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1)); Value *Kernel = Builder.CreatePointerCast(Info.KernelHandle, GenericVoidPtrTy); Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy); const char *Name = BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange ? "__get_kernel_max_sub_group_size_for_ndrange_impl" : "__get_kernel_sub_group_count_for_ndrange_impl"; return RValue::get(EmitRuntimeCall( CGM.CreateRuntimeFunction( llvm::FunctionType::get( IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy}, false), Name), {NDRange, Kernel, Block})); } case Builtin::BI__builtin_store_half: case Builtin::BI__builtin_store_halff: { Value *Val = EmitScalarExpr(E->getArg(0)); Address Address = EmitPointerWithAlignment(E->getArg(1)); Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); Builder.CreateStore(HalfVal, Address); return RValue::get(nullptr); } case Builtin::BI__builtin_load_half: { Address Address = EmitPointerWithAlignment(E->getArg(0)); Value *HalfVal = Builder.CreateLoad(Address); return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); } case Builtin::BI__builtin_load_halff: { Address Address = EmitPointerWithAlignment(E->getArg(0)); Value *HalfVal = Builder.CreateLoad(Address); return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); } case Builtin::BI__builtin_printf: case Builtin::BIprintf: if (getTarget().getTriple().isNVPTX() || getTarget().getTriple().isAMDGCN() || (getTarget().getTriple().isSPIRV() && getTarget().getTriple().getVendor() == Triple::VendorType::AMD)) { if (getLangOpts().OpenMPIsTargetDevice) return EmitOpenMPDevicePrintfCallExpr(E); if (getTarget().getTriple().isNVPTX()) return EmitNVPTXDevicePrintfCallExpr(E); if ((getTarget().getTriple().isAMDGCN() || getTarget().getTriple().isSPIRV()) && getLangOpts().HIP) return EmitAMDGPUDevicePrintfCallExpr(E); } break; case Builtin::BI__builtin_canonicalize: case Builtin::BI__builtin_canonicalizef: case Builtin::BI__builtin_canonicalizef16: case Builtin::BI__builtin_canonicalizel: return RValue::get( emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::canonicalize)); case Builtin::BI__builtin_thread_pointer: { if (!getContext().getTargetInfo().isTLSSupported()) CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); // Fall through - it's already mapped to the intrinsic by ClangBuiltin. break; } case Builtin::BI__builtin_os_log_format: return emitBuiltinOSLogFormat(*E); case Builtin::BI__xray_customevent: { if (!ShouldXRayInstrumentFunction()) return RValue::getIgnored(); if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( XRayInstrKind::Custom)) return RValue::getIgnored(); if (const auto *XRayAttr = CurFuncDecl->getAttr()) if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents()) return RValue::getIgnored(); Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent); auto FTy = F->getFunctionType(); auto Arg0 = E->getArg(0); auto Arg0Val = EmitScalarExpr(Arg0); auto Arg0Ty = Arg0->getType(); auto PTy0 = FTy->getParamType(0); if (PTy0 != Arg0Val->getType()) { if (Arg0Ty->isArrayType()) Arg0Val = EmitArrayToPointerDecay(Arg0).emitRawPointer(*this); else Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0); } auto Arg1 = EmitScalarExpr(E->getArg(1)); auto PTy1 = FTy->getParamType(1); if (PTy1 != Arg1->getType()) Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1); return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1})); } case Builtin::BI__xray_typedevent: { // TODO: There should be a way to always emit events even if the current // function is not instrumented. Losing events in a stream can cripple // a trace. if (!ShouldXRayInstrumentFunction()) return RValue::getIgnored(); if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has( XRayInstrKind::Typed)) return RValue::getIgnored(); if (const auto *XRayAttr = CurFuncDecl->getAttr()) if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents()) return RValue::getIgnored(); Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent); auto FTy = F->getFunctionType(); auto Arg0 = EmitScalarExpr(E->getArg(0)); auto PTy0 = FTy->getParamType(0); if (PTy0 != Arg0->getType()) Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0); auto Arg1 = E->getArg(1); auto Arg1Val = EmitScalarExpr(Arg1); auto Arg1Ty = Arg1->getType(); auto PTy1 = FTy->getParamType(1); if (PTy1 != Arg1Val->getType()) { if (Arg1Ty->isArrayType()) Arg1Val = EmitArrayToPointerDecay(Arg1).emitRawPointer(*this); else Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1); } auto Arg2 = EmitScalarExpr(E->getArg(2)); auto PTy2 = FTy->getParamType(2); if (PTy2 != Arg2->getType()) Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2); return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2})); } case Builtin::BI__builtin_ms_va_start: case Builtin::BI__builtin_ms_va_end: return RValue::get( EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).emitRawPointer(*this), BuiltinID == Builtin::BI__builtin_ms_va_start)); case Builtin::BI__builtin_ms_va_copy: { // Lower this manually. We can't reliably determine whether or not any // given va_copy() is for a Win64 va_list from the calling convention // alone, because it's legal to do this from a System V ABI function. // With opaque pointer types, we won't have enough information in LLVM // IR to determine this from the argument types, either. Best to do it // now, while we have enough information. Address DestAddr = EmitMSVAListRef(E->getArg(0)); Address SrcAddr = EmitMSVAListRef(E->getArg(1)); DestAddr = DestAddr.withElementType(Int8PtrTy); SrcAddr = SrcAddr.withElementType(Int8PtrTy); Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); return RValue::get(Builder.CreateStore(ArgPtr, DestAddr)); } case Builtin::BI__builtin_get_device_side_mangled_name: { auto Name = CGM.getCUDARuntime().getDeviceSideName( cast(E->getArg(0)->IgnoreImpCasts())->getDecl()); auto Str = CGM.GetAddrOfConstantCString(Name, ""); return RValue::get(Str.getPointer()); } } // If this is an alias for a lib function (e.g. __builtin_sin), emit // the call using the normal call path, but using the unmangled // version of the function name. if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) return emitLibraryCall(*this, FD, E, CGM.getBuiltinLibFunction(FD, BuiltinID)); // If this is a predefined lib function (e.g. malloc), emit the call // using exactly the normal call path. if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) return emitLibraryCall(*this, FD, E, CGM.getRawFunctionPointer(FD)); // Check that a call to a target specific builtin has the correct target // features. // This is down here to avoid non-target specific builtins, however, if // generic builtins start to require generic target features then we // can move this up to the beginning of the function. checkTargetFeatures(E, FD); if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); // See if we have a target specific intrinsic. StringRef Name = getContext().BuiltinInfo.getName(BuiltinID); Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; StringRef Prefix = llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch()); if (!Prefix.empty()) { IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin(Prefix.data(), Name); if (IntrinsicID == Intrinsic::not_intrinsic && Prefix == "spv" && getTarget().getTriple().getOS() == llvm::Triple::OSType::AMDHSA) IntrinsicID = Intrinsic::getIntrinsicForClangBuiltin("amdgcn", Name); // NOTE we don't need to perform a compatibility flag check here since the // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. if (IntrinsicID == Intrinsic::not_intrinsic) IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name); } if (IntrinsicID != Intrinsic::not_intrinsic) { SmallVector Args; // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); Function *F = CGM.getIntrinsic(IntrinsicID); llvm::FunctionType *FTy = F->getFunctionType(); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { Value *ArgValue = EmitScalarOrConstFoldImmArg(ICEArguments, i, E); // If the intrinsic arg type is different from the builtin arg type // we need to do a bit cast. llvm::Type *PTy = FTy->getParamType(i); if (PTy != ArgValue->getType()) { // XXX - vector of pointers? if (auto *PtrTy = dyn_cast(PTy)) { if (PtrTy->getAddressSpace() != ArgValue->getType()->getPointerAddressSpace()) { ArgValue = Builder.CreateAddrSpaceCast( ArgValue, llvm::PointerType::get(getLLVMContext(), PtrTy->getAddressSpace())); } } // Cast vector type (e.g., v256i32) to x86_amx, this only happen // in amx intrinsics. if (PTy->isX86_AMXTy()) ArgValue = Builder.CreateIntrinsic(Intrinsic::x86_cast_vector_to_tile, {ArgValue->getType()}, {ArgValue}); else ArgValue = Builder.CreateBitCast(ArgValue, PTy); } Args.push_back(ArgValue); } Value *V = Builder.CreateCall(F, Args); QualType BuiltinRetType = E->getType(); llvm::Type *RetTy = VoidTy; if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType); if (RetTy != V->getType()) { // XXX - vector of pointers? if (auto *PtrTy = dyn_cast(RetTy)) { if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) { V = Builder.CreateAddrSpaceCast( V, llvm::PointerType::get(getLLVMContext(), PtrTy->getAddressSpace())); } } // Cast x86_amx to vector type (e.g., v256i32), this only happen // in amx intrinsics. if (V->getType()->isX86_AMXTy()) V = Builder.CreateIntrinsic(Intrinsic::x86_cast_tile_to_vector, {RetTy}, {V}); else V = Builder.CreateBitCast(V, RetTy); } if (RetTy->isVoidTy()) return RValue::get(nullptr); return RValue::get(V); } // Some target-specific builtins can have aggregate return values, e.g. // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force // ReturnValue to be non-null, so that the target-specific emission code can // always just emit into it. TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) { Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); ReturnValue = ReturnValueSlot(DestPtr, false); } // Now see if we can emit a target-specific builtin. if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { switch (EvalKind) { case TEK_Scalar: if (V->getType()->isVoidTy()) return RValue::get(nullptr); return RValue::get(V); case TEK_Aggregate: return RValue::getAggregate(ReturnValue.getAddress(), ReturnValue.isVolatile()); case TEK_Complex: llvm_unreachable("No current target builtin returns complex"); } llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); } // EmitHLSLBuiltinExpr will check getLangOpts().HLSL if (Value *V = EmitHLSLBuiltinExpr(BuiltinID, E)) return RValue::get(V); if (getLangOpts().HIPStdPar && getLangOpts().CUDAIsDevice) return EmitHipStdParUnsupportedBuiltin(this, FD); ErrorUnsupported(E, "builtin function"); // Unknown builtin, for now just dump it out and return undef. return GetUndefRValue(E->getType()); } static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { // When compiling in HipStdPar mode we have to be conservative in rejecting // target specific features in the FE, and defer the possible error to the // AcceleratorCodeSelection pass, wherein iff an unsupported target builtin is // referenced by an accelerator executable function, we emit an error. // Returning nullptr here leads to the builtin being handled in // EmitStdParUnsupportedBuiltin. if (CGF->getLangOpts().HIPStdPar && CGF->getLangOpts().CUDAIsDevice && Arch != CGF->getTarget().getTriple().getArch()) return nullptr; switch (Arch) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch); case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch); case llvm::Triple::bpfeb: case llvm::Triple::bpfel: return CGF->EmitBPFBuiltinExpr(BuiltinID, E); case llvm::Triple::x86: case llvm::Triple::x86_64: return CGF->EmitX86BuiltinExpr(BuiltinID, E); case llvm::Triple::ppc: case llvm::Triple::ppcle: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: return CGF->EmitPPCBuiltinExpr(BuiltinID, E); case llvm::Triple::r600: case llvm::Triple::amdgcn: return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); case llvm::Triple::systemz: return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); case llvm::Triple::nvptx: case llvm::Triple::nvptx64: return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); case llvm::Triple::wasm32: case llvm::Triple::wasm64: return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); case llvm::Triple::hexagon: return CGF->EmitHexagonBuiltinExpr(BuiltinID, E); case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); case llvm::Triple::spirv64: if (CGF->getTarget().getTriple().getOS() != llvm::Triple::OSType::AMDHSA) return nullptr; return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); default: return nullptr; } } Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { assert(getContext().getAuxTargetInfo() && "Missing aux target info"); return EmitTargetArchBuiltinExpr( this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch()); } return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue, getTarget().getTriple().getArch()); } static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType = true, bool V1Ty = false, bool AllowBFloatArgsAndRet = true) { int IsQuad = TypeFlags.isQuad(); switch (TypeFlags.getEltType()) { case NeonTypeFlags::Int8: case NeonTypeFlags::Poly8: return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); case NeonTypeFlags::Int16: case NeonTypeFlags::Poly16: return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::BFloat16: if (AllowBFloatArgsAndRet) return llvm::FixedVectorType::get(CGF->BFloatTy, V1Ty ? 1 : (4 << IsQuad)); else return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Float16: if (HasLegalHalfType) return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad)); else return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); case NeonTypeFlags::Int32: return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Int64: case NeonTypeFlags::Poly64: return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); case NeonTypeFlags::Poly128: // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. // There is a lot of i128 and f128 API missing. // so we use v16i8 to represent poly128 and get pattern matched. return llvm::FixedVectorType::get(CGF->Int8Ty, 16); case NeonTypeFlags::Float32: return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); case NeonTypeFlags::Float64: return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); } llvm_unreachable("Unknown vector element type!"); } static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags) { int IsQuad = IntTypeFlags.isQuad(); switch (IntTypeFlags.getEltType()) { case NeonTypeFlags::Int16: return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad)); case NeonTypeFlags::Int32: return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad)); case NeonTypeFlags::Int64: return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad)); default: llvm_unreachable("Type can't be converted to floating-point!"); } } Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C, const ElementCount &Count) { Value *SV = llvm::ConstantVector::getSplat(Count, C); return Builder.CreateShuffleVector(V, V, SV, "lane"); } Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { ElementCount EC = cast(V->getType())->getElementCount(); return EmitNeonSplat(V, C, EC); } Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl &Ops, const char *name, unsigned shift, bool rightshift) { unsigned j = 0; for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ++ai, ++j) { if (F->isConstrainedFPIntrinsic()) if (ai->getType()->isMetadataTy()) continue; if (shift > 0 && shift == j) Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); else Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); } if (F->isConstrainedFPIntrinsic()) return Builder.CreateConstrainedFPCall(F, Ops, name); else return Builder.CreateCall(F, Ops, name); } Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, bool neg) { int SV = cast(V)->getSExtValue(); return ConstantInt::get(Ty, neg ? -SV : SV); } // Right-shift a vector by a constant. Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, llvm::Type *Ty, bool usgn, const char *name) { llvm::VectorType *VTy = cast(Ty); int ShiftAmt = cast(Shift)->getSExtValue(); int EltSize = VTy->getScalarSizeInBits(); Vec = Builder.CreateBitCast(Vec, Ty); // lshr/ashr are undefined when the shift amount is equal to the vector // element size. if (ShiftAmt == EltSize) { if (usgn) { // Right-shifting an unsigned value by its size yields 0. return llvm::ConstantAggregateZero::get(VTy); } else { // Right-shifting a signed value by its size is equivalent // to a shift of size-1. --ShiftAmt; Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); } } Shift = EmitNeonShiftVector(Shift, Ty, false); if (usgn) return Builder.CreateLShr(Vec, Shift, name); else return Builder.CreateAShr(Vec, Shift, name); } enum { AddRetType = (1 << 0), Add1ArgType = (1 << 1), Add2ArgTypes = (1 << 2), VectorizeRetType = (1 << 3), VectorizeArgTypes = (1 << 4), InventFloatType = (1 << 5), UnsignedAlts = (1 << 6), Use64BitVectors = (1 << 7), Use128BitVectors = (1 << 8), Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, VectorRet = AddRetType | VectorizeRetType, VectorRetGetArgs01 = AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, FpCmpzModifiers = AddRetType | VectorizeRetType | Add1ArgType | InventFloatType }; namespace { struct ARMVectorIntrinsicInfo { const char *NameHint; unsigned BuiltinID; unsigned LLVMIntrinsic; unsigned AltLLVMIntrinsic; uint64_t TypeModifier; bool operator<(unsigned RHSBuiltinID) const { return BuiltinID < RHSBuiltinID; } bool operator<(const ARMVectorIntrinsicInfo &TE) const { return BuiltinID < TE.BuiltinID; } }; } // end anonymous namespace #define NEONMAP0(NameBase) \ { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ Intrinsic::LLVMIntrinsic, 0, TypeModifier } #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ TypeModifier } static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = { NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0), NEONMAP0(splat_lane_v), NEONMAP0(splat_laneq_v), NEONMAP0(splatq_lane_v), NEONMAP0(splatq_laneq_v), NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), NEONMAP1(vabs_v, arm_neon_vabs, 0), NEONMAP1(vabsq_v, arm_neon_vabs, 0), NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), NEONMAP0(vaddq_v), NEONMAP1(vaesdq_u8, arm_neon_aesd, 0), NEONMAP1(vaeseq_u8, arm_neon_aese, 0), NEONMAP1(vaesimcq_u8, arm_neon_aesimc, 0), NEONMAP1(vaesmcq_u8, arm_neon_aesmc, 0), NEONMAP1(vbfdot_f32, arm_neon_bfdot, 0), NEONMAP1(vbfdotq_f32, arm_neon_bfdot, 0), NEONMAP1(vbfmlalbq_f32, arm_neon_bfmlalb, 0), NEONMAP1(vbfmlaltq_f32, arm_neon_bfmlalt, 0), NEONMAP1(vbfmmlaq_f32, arm_neon_bfmmla, 0), NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), NEONMAP1(vcadd_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcadd_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcaddq_rot270_f16, arm_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcaddq_rot270_f32, arm_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcaddq_rot270_f64, arm_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcaddq_rot90_f16, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcaddq_rot90_f32, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcaddq_rot90_f64, arm_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, arm_neon_vacge, 0), NEONMAP1(vcageq_v, arm_neon_vacge, 0), NEONMAP1(vcagt_v, arm_neon_vacgt, 0), NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), NEONMAP1(vcale_v, arm_neon_vacge, 0), NEONMAP1(vcaleq_v, arm_neon_vacge, 0), NEONMAP1(vcalt_v, arm_neon_vacgt, 0), NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), NEONMAP0(vceqz_v), NEONMAP0(vceqzq_v), NEONMAP0(vcgez_v), NEONMAP0(vcgezq_v), NEONMAP0(vcgtz_v), NEONMAP0(vcgtzq_v), NEONMAP0(vclez_v), NEONMAP0(vclezq_v), NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), NEONMAP0(vcltz_v), NEONMAP0(vcltzq_v), NEONMAP1(vclz_v, ctlz, Add1ArgType), NEONMAP1(vclzq_v, ctlz, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), NEONMAP0(vcvt_f16_s16), NEONMAP0(vcvt_f16_u16), NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), NEONMAP1(vcvt_n_f16_s16, arm_neon_vcvtfxs2fp, 0), NEONMAP1(vcvt_n_f16_u16, arm_neon_vcvtfxu2fp, 0), NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), NEONMAP1(vcvt_n_s16_f16, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_u16_f16, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), NEONMAP0(vcvt_s16_f16), NEONMAP0(vcvt_s32_v), NEONMAP0(vcvt_s64_v), NEONMAP0(vcvt_u16_f16), NEONMAP0(vcvt_u32_v), NEONMAP0(vcvt_u64_v), NEONMAP1(vcvta_s16_f16, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), NEONMAP1(vcvta_u16_f16, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_s16_f16, arm_neon_vcvtas, 0), NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), NEONMAP1(vcvtaq_u16_f16, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), NEONMAP1(vcvth_bf16_f32, arm_neon_vcvtbfp2bf, 0), NEONMAP1(vcvtm_s16_f16, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtm_u16_f16, arm_neon_vcvtmu, 0), NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtmq_s16_f16, arm_neon_vcvtms, 0), NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), NEONMAP1(vcvtmq_u16_f16, arm_neon_vcvtmu, 0), NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), NEONMAP1(vcvtn_s16_f16, arm_neon_vcvtns, 0), NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtn_u16_f16, arm_neon_vcvtnu, 0), NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtnq_s16_f16, arm_neon_vcvtns, 0), NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), NEONMAP1(vcvtnq_u16_f16, arm_neon_vcvtnu, 0), NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), NEONMAP1(vcvtp_s16_f16, arm_neon_vcvtps, 0), NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtp_u16_f16, arm_neon_vcvtpu, 0), NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_s16_f16, arm_neon_vcvtps, 0), NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), NEONMAP1(vcvtpq_u16_f16, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), NEONMAP0(vcvtq_f16_s16), NEONMAP0(vcvtq_f16_u16), NEONMAP0(vcvtq_f32_v), NEONMAP1(vcvtq_n_f16_s16, arm_neon_vcvtfxs2fp, 0), NEONMAP1(vcvtq_n_f16_u16, arm_neon_vcvtfxu2fp, 0), NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), NEONMAP1(vcvtq_n_s16_f16, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_u16_f16, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), NEONMAP0(vcvtq_s16_f16), NEONMAP0(vcvtq_s32_v), NEONMAP0(vcvtq_s64_v), NEONMAP0(vcvtq_u16_f16), NEONMAP0(vcvtq_u32_v), NEONMAP0(vcvtq_u64_v), NEONMAP1(vdot_s32, arm_neon_sdot, 0), NEONMAP1(vdot_u32, arm_neon_udot, 0), NEONMAP1(vdotq_s32, arm_neon_sdot, 0), NEONMAP1(vdotq_u32, arm_neon_udot, 0), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), NEONMAP0(vfmaq_v), NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), NEONMAP0(vld1_dup_v), NEONMAP1(vld1_v, arm_neon_vld1, 0), NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0), NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0), NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0), NEONMAP0(vld1q_dup_v), NEONMAP1(vld1q_v, arm_neon_vld1, 0), NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0), NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0), NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0), NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0), NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), NEONMAP1(vld2_v, arm_neon_vld2, 0), NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0), NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), NEONMAP1(vld2q_v, arm_neon_vld2, 0), NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0), NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), NEONMAP1(vld3_v, arm_neon_vld3, 0), NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0), NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), NEONMAP1(vld3q_v, arm_neon_vld3, 0), NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0), NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4_v, arm_neon_vld4, 0), NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0), NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), NEONMAP1(vld4q_v, arm_neon_vld4, 0), NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), NEONMAP1(vmmlaq_s32, arm_neon_smmla, 0), NEONMAP1(vmmlaq_u32, arm_neon_ummla, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), NEONMAP0(vmull_v), NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), NEONMAP2(vqadd_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), NEONMAP2(vqaddq_v, uadd_sat, sadd_sat, Add1ArgType | UnsignedAlts), NEONMAP2(vqdmlal_v, arm_neon_vqdmull, sadd_sat, 0), NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, ssub_sat, 0), NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), NEONMAP1(vqrdmlah_s16, arm_neon_vqrdmlah, Add1ArgType), NEONMAP1(vqrdmlah_s32, arm_neon_vqrdmlah, Add1ArgType), NEONMAP1(vqrdmlahq_s16, arm_neon_vqrdmlah, Add1ArgType), NEONMAP1(vqrdmlahq_s32, arm_neon_vqrdmlah, Add1ArgType), NEONMAP1(vqrdmlsh_s16, arm_neon_vqrdmlsh, Add1ArgType), NEONMAP1(vqrdmlsh_s32, arm_neon_vqrdmlsh, Add1ArgType), NEONMAP1(vqrdmlshq_s16, arm_neon_vqrdmlsh, Add1ArgType), NEONMAP1(vqrdmlshq_s32, arm_neon_vqrdmlsh, Add1ArgType), NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), NEONMAP2(vqsub_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), NEONMAP2(vqsubq_v, usub_sat, ssub_sat, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), NEONMAP0(vrndi_v), NEONMAP0(vrndiq_v), NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), NEONMAP1(vsha1su0q_u32, arm_neon_sha1su0, 0), NEONMAP1(vsha1su1q_u32, arm_neon_sha1su1, 0), NEONMAP1(vsha256h2q_u32, arm_neon_sha256h2, 0), NEONMAP1(vsha256hq_u32, arm_neon_sha256h, 0), NEONMAP1(vsha256su0q_u32, arm_neon_sha256su0, 0), NEONMAP1(vsha256su1q_u32, arm_neon_sha256su1, 0), NEONMAP0(vshl_n_v), NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), NEONMAP0(vshll_n_v), NEONMAP0(vshlq_n_v), NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), NEONMAP0(vshr_n_v), NEONMAP0(vshrn_n_v), NEONMAP0(vshrq_n_v), NEONMAP1(vst1_v, arm_neon_vst1, 0), NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0), NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0), NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0), NEONMAP1(vst1q_v, arm_neon_vst1, 0), NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0), NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0), NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0), NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), NEONMAP1(vst2_v, arm_neon_vst2, 0), NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), NEONMAP1(vst2q_v, arm_neon_vst2, 0), NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), NEONMAP1(vst3_v, arm_neon_vst3, 0), NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), NEONMAP1(vst3q_v, arm_neon_vst3, 0), NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), NEONMAP1(vst4_v, arm_neon_vst4, 0), NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), NEONMAP1(vst4q_v, arm_neon_vst4, 0), NEONMAP0(vsubhn_v), NEONMAP0(vtrn_v), NEONMAP0(vtrnq_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), NEONMAP1(vusdot_s32, arm_neon_usdot, 0), NEONMAP1(vusdotq_s32, arm_neon_usdot, 0), NEONMAP1(vusmmlaq_s32, arm_neon_usmmla, 0), NEONMAP0(vuzp_v), NEONMAP0(vuzpq_v), NEONMAP0(vzip_v), NEONMAP0(vzipq_v) }; static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = { NEONMAP1(__a64_vcvtq_low_bf16_f32, aarch64_neon_bfcvtn, 0), NEONMAP0(splat_lane_v), NEONMAP0(splat_laneq_v), NEONMAP0(splatq_lane_v), NEONMAP0(splatq_laneq_v), NEONMAP1(vabs_v, aarch64_neon_abs, 0), NEONMAP1(vabsq_v, aarch64_neon_abs, 0), NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), NEONMAP0(vaddq_p128), NEONMAP0(vaddq_v), NEONMAP1(vaesdq_u8, aarch64_crypto_aesd, 0), NEONMAP1(vaeseq_u8, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_u8, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_u8, aarch64_crypto_aesmc, 0), NEONMAP2(vbcaxq_s16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vbcaxq_s32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vbcaxq_s64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vbcaxq_s8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vbcaxq_u16, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vbcaxq_u32, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vbcaxq_u64, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP2(vbcaxq_u8, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP1(vbfdot_f32, aarch64_neon_bfdot, 0), NEONMAP1(vbfdotq_f32, aarch64_neon_bfdot, 0), NEONMAP1(vbfmlalbq_f32, aarch64_neon_bfmlalb, 0), NEONMAP1(vbfmlaltq_f32, aarch64_neon_bfmlalt, 0), NEONMAP1(vbfmmlaq_f32, aarch64_neon_bfmmla, 0), NEONMAP1(vcadd_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcadd_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcadd_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcaddq_rot270_f16, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcaddq_rot270_f32, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcaddq_rot270_f64, aarch64_neon_vcadd_rot270, Add1ArgType), NEONMAP1(vcaddq_rot90_f16, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcaddq_rot90_f32, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcaddq_rot90_f64, aarch64_neon_vcadd_rot90, Add1ArgType), NEONMAP1(vcage_v, aarch64_neon_facge, 0), NEONMAP1(vcageq_v, aarch64_neon_facge, 0), NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), NEONMAP1(vcale_v, aarch64_neon_facge, 0), NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), NEONMAP0(vceqz_v), NEONMAP0(vceqzq_v), NEONMAP0(vcgez_v), NEONMAP0(vcgezq_v), NEONMAP0(vcgtz_v), NEONMAP0(vcgtzq_v), NEONMAP0(vclez_v), NEONMAP0(vclezq_v), NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), NEONMAP0(vcltz_v), NEONMAP0(vcltzq_v), NEONMAP1(vclz_v, ctlz, Add1ArgType), NEONMAP1(vclzq_v, ctlz, Add1ArgType), NEONMAP1(vcmla_f16, aarch64_neon_vcmla_rot0, Add1ArgType), NEONMAP1(vcmla_f32, aarch64_neon_vcmla_rot0, Add1ArgType), NEONMAP1(vcmla_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), NEONMAP1(vcmla_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), NEONMAP1(vcmla_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), NEONMAP1(vcmla_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), NEONMAP1(vcmla_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), NEONMAP1(vcmla_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), NEONMAP1(vcmlaq_f16, aarch64_neon_vcmla_rot0, Add1ArgType), NEONMAP1(vcmlaq_f32, aarch64_neon_vcmla_rot0, Add1ArgType), NEONMAP1(vcmlaq_f64, aarch64_neon_vcmla_rot0, Add1ArgType), NEONMAP1(vcmlaq_rot180_f16, aarch64_neon_vcmla_rot180, Add1ArgType), NEONMAP1(vcmlaq_rot180_f32, aarch64_neon_vcmla_rot180, Add1ArgType), NEONMAP1(vcmlaq_rot180_f64, aarch64_neon_vcmla_rot180, Add1ArgType), NEONMAP1(vcmlaq_rot270_f16, aarch64_neon_vcmla_rot270, Add1ArgType), NEONMAP1(vcmlaq_rot270_f32, aarch64_neon_vcmla_rot270, Add1ArgType), NEONMAP1(vcmlaq_rot270_f64, aarch64_neon_vcmla_rot270, Add1ArgType), NEONMAP1(vcmlaq_rot90_f16, aarch64_neon_vcmla_rot90, Add1ArgType), NEONMAP1(vcmlaq_rot90_f32, aarch64_neon_vcmla_rot90, Add1ArgType), NEONMAP1(vcmlaq_rot90_f64, aarch64_neon_vcmla_rot90, Add1ArgType), NEONMAP1(vcnt_v, ctpop, Add1ArgType), NEONMAP1(vcntq_v, ctpop, Add1ArgType), NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), NEONMAP0(vcvt_f16_s16), NEONMAP0(vcvt_f16_u16), NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), NEONMAP0(vcvt_f32_v), NEONMAP1(vcvt_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), NEONMAP1(vcvt_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP1(vcvt_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvt_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP0(vcvtq_f16_s16), NEONMAP0(vcvtq_f16_u16), NEONMAP0(vcvtq_f32_v), NEONMAP1(vcvtq_high_bf16_f32, aarch64_neon_bfcvtn2, 0), NEONMAP1(vcvtq_n_f16_s16, aarch64_neon_vcvtfxs2fp, 0), NEONMAP1(vcvtq_n_f16_u16, aarch64_neon_vcvtfxu2fp, 0), NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), NEONMAP1(vcvtq_n_s16_f16, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), NEONMAP1(vcvtq_n_u16_f16, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), NEONMAP1(vdot_s32, aarch64_neon_sdot, 0), NEONMAP1(vdot_u32, aarch64_neon_udot, 0), NEONMAP1(vdotq_s32, aarch64_neon_sdot, 0), NEONMAP1(vdotq_u32, aarch64_neon_udot, 0), NEONMAP2(veor3q_s16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP2(veor3q_s32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP2(veor3q_s64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP2(veor3q_s8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP2(veor3q_u16, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP2(veor3q_u32, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP2(veor3q_u64, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP2(veor3q_u8, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), NEONMAP0(vfmaq_v), NEONMAP1(vfmlal_high_f16, aarch64_neon_fmlal2, 0), NEONMAP1(vfmlal_low_f16, aarch64_neon_fmlal, 0), NEONMAP1(vfmlalq_high_f16, aarch64_neon_fmlal2, 0), NEONMAP1(vfmlalq_low_f16, aarch64_neon_fmlal, 0), NEONMAP1(vfmlsl_high_f16, aarch64_neon_fmlsl2, 0), NEONMAP1(vfmlsl_low_f16, aarch64_neon_fmlsl, 0), NEONMAP1(vfmlslq_high_f16, aarch64_neon_fmlsl2, 0), NEONMAP1(vfmlslq_low_f16, aarch64_neon_fmlsl, 0), NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0), NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0), NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0), NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0), NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0), NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0), NEONMAP1(vmmlaq_s32, aarch64_neon_smmla, 0), NEONMAP1(vmmlaq_u32, aarch64_neon_ummla, 0), NEONMAP0(vmovl_v), NEONMAP0(vmovn_v), NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), NEONMAP1(vqdmulh_lane_v, aarch64_neon_sqdmulh_lane, 0), NEONMAP1(vqdmulh_laneq_v, aarch64_neon_sqdmulh_laneq, 0), NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), NEONMAP1(vqdmulhq_lane_v, aarch64_neon_sqdmulh_lane, 0), NEONMAP1(vqdmulhq_laneq_v, aarch64_neon_sqdmulh_laneq, 0), NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), NEONMAP1(vqrdmlah_s16, aarch64_neon_sqrdmlah, Add1ArgType), NEONMAP1(vqrdmlah_s32, aarch64_neon_sqrdmlah, Add1ArgType), NEONMAP1(vqrdmlahq_s16, aarch64_neon_sqrdmlah, Add1ArgType), NEONMAP1(vqrdmlahq_s32, aarch64_neon_sqrdmlah, Add1ArgType), NEONMAP1(vqrdmlsh_s16, aarch64_neon_sqrdmlsh, Add1ArgType), NEONMAP1(vqrdmlsh_s32, aarch64_neon_sqrdmlsh, Add1ArgType), NEONMAP1(vqrdmlshq_s16, aarch64_neon_sqrdmlsh, Add1ArgType), NEONMAP1(vqrdmlshq_s32, aarch64_neon_sqrdmlsh, Add1ArgType), NEONMAP1(vqrdmulh_lane_v, aarch64_neon_sqrdmulh_lane, 0), NEONMAP1(vqrdmulh_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), NEONMAP1(vqrdmulhq_lane_v, aarch64_neon_sqrdmulh_lane, 0), NEONMAP1(vqrdmulhq_laneq_v, aarch64_neon_sqrdmulh_laneq, 0), NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), NEONMAP1(vrax1q_u64, aarch64_crypto_rax1, 0), NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType), NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType), NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType), NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType), NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType), NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType), NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType), NEONMAP0(vrndi_v), NEONMAP0(vrndiq_v), NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), NEONMAP1(vsha1su0q_u32, aarch64_crypto_sha1su0, 0), NEONMAP1(vsha1su1q_u32, aarch64_crypto_sha1su1, 0), NEONMAP1(vsha256h2q_u32, aarch64_crypto_sha256h2, 0), NEONMAP1(vsha256hq_u32, aarch64_crypto_sha256h, 0), NEONMAP1(vsha256su0q_u32, aarch64_crypto_sha256su0, 0), NEONMAP1(vsha256su1q_u32, aarch64_crypto_sha256su1, 0), NEONMAP1(vsha512h2q_u64, aarch64_crypto_sha512h2, 0), NEONMAP1(vsha512hq_u64, aarch64_crypto_sha512h, 0), NEONMAP1(vsha512su0q_u64, aarch64_crypto_sha512su0, 0), NEONMAP1(vsha512su1q_u64, aarch64_crypto_sha512su1, 0), NEONMAP0(vshl_n_v), NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), NEONMAP0(vshll_n_v), NEONMAP0(vshlq_n_v), NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), NEONMAP0(vshr_n_v), NEONMAP0(vshrn_n_v), NEONMAP0(vshrq_n_v), NEONMAP1(vsm3partw1q_u32, aarch64_crypto_sm3partw1, 0), NEONMAP1(vsm3partw2q_u32, aarch64_crypto_sm3partw2, 0), NEONMAP1(vsm3ss1q_u32, aarch64_crypto_sm3ss1, 0), NEONMAP1(vsm3tt1aq_u32, aarch64_crypto_sm3tt1a, 0), NEONMAP1(vsm3tt1bq_u32, aarch64_crypto_sm3tt1b, 0), NEONMAP1(vsm3tt2aq_u32, aarch64_crypto_sm3tt2a, 0), NEONMAP1(vsm3tt2bq_u32, aarch64_crypto_sm3tt2b, 0), NEONMAP1(vsm4ekeyq_u32, aarch64_crypto_sm4ekey, 0), NEONMAP1(vsm4eq_u32, aarch64_crypto_sm4e, 0), NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0), NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0), NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0), NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0), NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0), NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0), NEONMAP0(vsubhn_v), NEONMAP0(vtst_v), NEONMAP0(vtstq_v), NEONMAP1(vusdot_s32, aarch64_neon_usdot, 0), NEONMAP1(vusdotq_s32, aarch64_neon_usdot, 0), NEONMAP1(vusmmlaq_s32, aarch64_neon_usmmla, 0), NEONMAP1(vxarq_u64, aarch64_crypto_xar, 0), }; static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0), NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), NEONMAP1(vqrdmlahh_s16, aarch64_neon_sqrdmlah, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqrdmlahs_s32, aarch64_neon_sqrdmlah, Add1ArgType), NEONMAP1(vqrdmlshh_s16, aarch64_neon_sqrdmlsh, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqrdmlshs_s32, aarch64_neon_sqrdmlsh, Add1ArgType), NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), // FP16 scalar intrinisics go here. NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType), NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType), NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType), NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType), NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType), NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType), NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType), NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType), }; // Some intrinsics are equivalent for codegen. static const std::pair NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_splat_lane_bf16, NEON::BI__builtin_neon_splat_lane_v, }, { NEON::BI__builtin_neon_splat_laneq_bf16, NEON::BI__builtin_neon_splat_laneq_v, }, { NEON::BI__builtin_neon_splatq_lane_bf16, NEON::BI__builtin_neon_splatq_lane_v, }, { NEON::BI__builtin_neon_splatq_laneq_bf16, NEON::BI__builtin_neon_splatq_laneq_v, }, { NEON::BI__builtin_neon_vabd_f16, NEON::BI__builtin_neon_vabd_v, }, { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, { NEON::BI__builtin_neon_vcagtq_f16, NEON::BI__builtin_neon_vcagtq_v, }, { NEON::BI__builtin_neon_vcale_f16, NEON::BI__builtin_neon_vcale_v, }, { NEON::BI__builtin_neon_vcaleq_f16, NEON::BI__builtin_neon_vcaleq_v, }, { NEON::BI__builtin_neon_vcalt_f16, NEON::BI__builtin_neon_vcalt_v, }, { NEON::BI__builtin_neon_vcaltq_f16, NEON::BI__builtin_neon_vcaltq_v, }, { NEON::BI__builtin_neon_vceqz_f16, NEON::BI__builtin_neon_vceqz_v, }, { NEON::BI__builtin_neon_vceqzq_f16, NEON::BI__builtin_neon_vceqzq_v, }, { NEON::BI__builtin_neon_vcgez_f16, NEON::BI__builtin_neon_vcgez_v, }, { NEON::BI__builtin_neon_vcgezq_f16, NEON::BI__builtin_neon_vcgezq_v, }, { NEON::BI__builtin_neon_vcgtz_f16, NEON::BI__builtin_neon_vcgtz_v, }, { NEON::BI__builtin_neon_vcgtzq_f16, NEON::BI__builtin_neon_vcgtzq_v, }, { NEON::BI__builtin_neon_vclez_f16, NEON::BI__builtin_neon_vclez_v, }, { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, }, { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, }, { NEON::BI__builtin_neon_vfmaq_f16, NEON::BI__builtin_neon_vfmaq_v, }, { NEON::BI__builtin_neon_vfmaq_lane_f16, NEON::BI__builtin_neon_vfmaq_lane_v, }, { NEON::BI__builtin_neon_vfmaq_laneq_f16, NEON::BI__builtin_neon_vfmaq_laneq_v, }, { NEON::BI__builtin_neon_vld1_bf16_x2, NEON::BI__builtin_neon_vld1_x2_v }, { NEON::BI__builtin_neon_vld1_bf16_x3, NEON::BI__builtin_neon_vld1_x3_v }, { NEON::BI__builtin_neon_vld1_bf16_x4, NEON::BI__builtin_neon_vld1_x4_v }, { NEON::BI__builtin_neon_vld1_bf16, NEON::BI__builtin_neon_vld1_v }, { NEON::BI__builtin_neon_vld1_dup_bf16, NEON::BI__builtin_neon_vld1_dup_v }, { NEON::BI__builtin_neon_vld1_lane_bf16, NEON::BI__builtin_neon_vld1_lane_v }, { NEON::BI__builtin_neon_vld1q_bf16_x2, NEON::BI__builtin_neon_vld1q_x2_v }, { NEON::BI__builtin_neon_vld1q_bf16_x3, NEON::BI__builtin_neon_vld1q_x3_v }, { NEON::BI__builtin_neon_vld1q_bf16_x4, NEON::BI__builtin_neon_vld1q_x4_v }, { NEON::BI__builtin_neon_vld1q_bf16, NEON::BI__builtin_neon_vld1q_v }, { NEON::BI__builtin_neon_vld1q_dup_bf16, NEON::BI__builtin_neon_vld1q_dup_v }, { NEON::BI__builtin_neon_vld1q_lane_bf16, NEON::BI__builtin_neon_vld1q_lane_v }, { NEON::BI__builtin_neon_vld2_bf16, NEON::BI__builtin_neon_vld2_v }, { NEON::BI__builtin_neon_vld2_dup_bf16, NEON::BI__builtin_neon_vld2_dup_v }, { NEON::BI__builtin_neon_vld2_lane_bf16, NEON::BI__builtin_neon_vld2_lane_v }, { NEON::BI__builtin_neon_vld2q_bf16, NEON::BI__builtin_neon_vld2q_v }, { NEON::BI__builtin_neon_vld2q_dup_bf16, NEON::BI__builtin_neon_vld2q_dup_v }, { NEON::BI__builtin_neon_vld2q_lane_bf16, NEON::BI__builtin_neon_vld2q_lane_v }, { NEON::BI__builtin_neon_vld3_bf16, NEON::BI__builtin_neon_vld3_v }, { NEON::BI__builtin_neon_vld3_dup_bf16, NEON::BI__builtin_neon_vld3_dup_v }, { NEON::BI__builtin_neon_vld3_lane_bf16, NEON::BI__builtin_neon_vld3_lane_v }, { NEON::BI__builtin_neon_vld3q_bf16, NEON::BI__builtin_neon_vld3q_v }, { NEON::BI__builtin_neon_vld3q_dup_bf16, NEON::BI__builtin_neon_vld3q_dup_v }, { NEON::BI__builtin_neon_vld3q_lane_bf16, NEON::BI__builtin_neon_vld3q_lane_v }, { NEON::BI__builtin_neon_vld4_bf16, NEON::BI__builtin_neon_vld4_v }, { NEON::BI__builtin_neon_vld4_dup_bf16, NEON::BI__builtin_neon_vld4_dup_v }, { NEON::BI__builtin_neon_vld4_lane_bf16, NEON::BI__builtin_neon_vld4_lane_v }, { NEON::BI__builtin_neon_vld4q_bf16, NEON::BI__builtin_neon_vld4q_v }, { NEON::BI__builtin_neon_vld4q_dup_bf16, NEON::BI__builtin_neon_vld4q_dup_v }, { NEON::BI__builtin_neon_vld4q_lane_bf16, NEON::BI__builtin_neon_vld4q_lane_v }, { NEON::BI__builtin_neon_vmax_f16, NEON::BI__builtin_neon_vmax_v, }, { NEON::BI__builtin_neon_vmaxnm_f16, NEON::BI__builtin_neon_vmaxnm_v, }, { NEON::BI__builtin_neon_vmaxnmq_f16, NEON::BI__builtin_neon_vmaxnmq_v, }, { NEON::BI__builtin_neon_vmaxq_f16, NEON::BI__builtin_neon_vmaxq_v, }, { NEON::BI__builtin_neon_vmin_f16, NEON::BI__builtin_neon_vmin_v, }, { NEON::BI__builtin_neon_vminnm_f16, NEON::BI__builtin_neon_vminnm_v, }, { NEON::BI__builtin_neon_vminnmq_f16, NEON::BI__builtin_neon_vminnmq_v, }, { NEON::BI__builtin_neon_vminq_f16, NEON::BI__builtin_neon_vminq_v, }, { NEON::BI__builtin_neon_vmulx_f16, NEON::BI__builtin_neon_vmulx_v, }, { NEON::BI__builtin_neon_vmulxq_f16, NEON::BI__builtin_neon_vmulxq_v, }, { NEON::BI__builtin_neon_vpadd_f16, NEON::BI__builtin_neon_vpadd_v, }, { NEON::BI__builtin_neon_vpaddq_f16, NEON::BI__builtin_neon_vpaddq_v, }, { NEON::BI__builtin_neon_vpmax_f16, NEON::BI__builtin_neon_vpmax_v, }, { NEON::BI__builtin_neon_vpmaxnm_f16, NEON::BI__builtin_neon_vpmaxnm_v, }, { NEON::BI__builtin_neon_vpmaxnmq_f16, NEON::BI__builtin_neon_vpmaxnmq_v, }, { NEON::BI__builtin_neon_vpmaxq_f16, NEON::BI__builtin_neon_vpmaxq_v, }, { NEON::BI__builtin_neon_vpmin_f16, NEON::BI__builtin_neon_vpmin_v, }, { NEON::BI__builtin_neon_vpminnm_f16, NEON::BI__builtin_neon_vpminnm_v, }, { NEON::BI__builtin_neon_vpminnmq_f16, NEON::BI__builtin_neon_vpminnmq_v, }, { NEON::BI__builtin_neon_vpminq_f16, NEON::BI__builtin_neon_vpminq_v, }, { NEON::BI__builtin_neon_vrecpe_f16, NEON::BI__builtin_neon_vrecpe_v, }, { NEON::BI__builtin_neon_vrecpeq_f16, NEON::BI__builtin_neon_vrecpeq_v, }, { NEON::BI__builtin_neon_vrecps_f16, NEON::BI__builtin_neon_vrecps_v, }, { NEON::BI__builtin_neon_vrecpsq_f16, NEON::BI__builtin_neon_vrecpsq_v, }, { NEON::BI__builtin_neon_vrnd_f16, NEON::BI__builtin_neon_vrnd_v, }, { NEON::BI__builtin_neon_vrnda_f16, NEON::BI__builtin_neon_vrnda_v, }, { NEON::BI__builtin_neon_vrndaq_f16, NEON::BI__builtin_neon_vrndaq_v, }, { NEON::BI__builtin_neon_vrndi_f16, NEON::BI__builtin_neon_vrndi_v, }, { NEON::BI__builtin_neon_vrndiq_f16, NEON::BI__builtin_neon_vrndiq_v, }, { NEON::BI__builtin_neon_vrndm_f16, NEON::BI__builtin_neon_vrndm_v, }, { NEON::BI__builtin_neon_vrndmq_f16, NEON::BI__builtin_neon_vrndmq_v, }, { NEON::BI__builtin_neon_vrndn_f16, NEON::BI__builtin_neon_vrndn_v, }, { NEON::BI__builtin_neon_vrndnq_f16, NEON::BI__builtin_neon_vrndnq_v, }, { NEON::BI__builtin_neon_vrndp_f16, NEON::BI__builtin_neon_vrndp_v, }, { NEON::BI__builtin_neon_vrndpq_f16, NEON::BI__builtin_neon_vrndpq_v, }, { NEON::BI__builtin_neon_vrndq_f16, NEON::BI__builtin_neon_vrndq_v, }, { NEON::BI__builtin_neon_vrndx_f16, NEON::BI__builtin_neon_vrndx_v, }, { NEON::BI__builtin_neon_vrndxq_f16, NEON::BI__builtin_neon_vrndxq_v, }, { NEON::BI__builtin_neon_vrsqrte_f16, NEON::BI__builtin_neon_vrsqrte_v, }, { NEON::BI__builtin_neon_vrsqrteq_f16, NEON::BI__builtin_neon_vrsqrteq_v, }, { NEON::BI__builtin_neon_vrsqrts_f16, NEON::BI__builtin_neon_vrsqrts_v, }, { NEON::BI__builtin_neon_vrsqrtsq_f16, NEON::BI__builtin_neon_vrsqrtsq_v, }, { NEON::BI__builtin_neon_vsqrt_f16, NEON::BI__builtin_neon_vsqrt_v, }, { NEON::BI__builtin_neon_vsqrtq_f16, NEON::BI__builtin_neon_vsqrtq_v, }, { NEON::BI__builtin_neon_vst1_bf16_x2, NEON::BI__builtin_neon_vst1_x2_v }, { NEON::BI__builtin_neon_vst1_bf16_x3, NEON::BI__builtin_neon_vst1_x3_v }, { NEON::BI__builtin_neon_vst1_bf16_x4, NEON::BI__builtin_neon_vst1_x4_v }, { NEON::BI__builtin_neon_vst1_bf16, NEON::BI__builtin_neon_vst1_v }, { NEON::BI__builtin_neon_vst1_lane_bf16, NEON::BI__builtin_neon_vst1_lane_v }, { NEON::BI__builtin_neon_vst1q_bf16_x2, NEON::BI__builtin_neon_vst1q_x2_v }, { NEON::BI__builtin_neon_vst1q_bf16_x3, NEON::BI__builtin_neon_vst1q_x3_v }, { NEON::BI__builtin_neon_vst1q_bf16_x4, NEON::BI__builtin_neon_vst1q_x4_v }, { NEON::BI__builtin_neon_vst1q_bf16, NEON::BI__builtin_neon_vst1q_v }, { NEON::BI__builtin_neon_vst1q_lane_bf16, NEON::BI__builtin_neon_vst1q_lane_v }, { NEON::BI__builtin_neon_vst2_bf16, NEON::BI__builtin_neon_vst2_v }, { NEON::BI__builtin_neon_vst2_lane_bf16, NEON::BI__builtin_neon_vst2_lane_v }, { NEON::BI__builtin_neon_vst2q_bf16, NEON::BI__builtin_neon_vst2q_v }, { NEON::BI__builtin_neon_vst2q_lane_bf16, NEON::BI__builtin_neon_vst2q_lane_v }, { NEON::BI__builtin_neon_vst3_bf16, NEON::BI__builtin_neon_vst3_v }, { NEON::BI__builtin_neon_vst3_lane_bf16, NEON::BI__builtin_neon_vst3_lane_v }, { NEON::BI__builtin_neon_vst3q_bf16, NEON::BI__builtin_neon_vst3q_v }, { NEON::BI__builtin_neon_vst3q_lane_bf16, NEON::BI__builtin_neon_vst3q_lane_v }, { NEON::BI__builtin_neon_vst4_bf16, NEON::BI__builtin_neon_vst4_v }, { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, // The mangling rules cause us to have one ID for each type for vldap1(q)_lane // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an // arbitrary one to be handled as tha canonical variation. { NEON::BI__builtin_neon_vldap1_lane_u64, NEON::BI__builtin_neon_vldap1_lane_s64 }, { NEON::BI__builtin_neon_vldap1_lane_f64, NEON::BI__builtin_neon_vldap1_lane_s64 }, { NEON::BI__builtin_neon_vldap1_lane_p64, NEON::BI__builtin_neon_vldap1_lane_s64 }, { NEON::BI__builtin_neon_vldap1q_lane_u64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, { NEON::BI__builtin_neon_vldap1q_lane_f64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, { NEON::BI__builtin_neon_vldap1q_lane_p64, NEON::BI__builtin_neon_vldap1q_lane_s64 }, { NEON::BI__builtin_neon_vstl1_lane_u64, NEON::BI__builtin_neon_vstl1_lane_s64 }, { NEON::BI__builtin_neon_vstl1_lane_f64, NEON::BI__builtin_neon_vstl1_lane_s64 }, { NEON::BI__builtin_neon_vstl1_lane_p64, NEON::BI__builtin_neon_vstl1_lane_s64 }, { NEON::BI__builtin_neon_vstl1q_lane_u64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, { NEON::BI__builtin_neon_vstl1q_lane_f64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, { NEON::BI__builtin_neon_vstl1q_lane_p64, NEON::BI__builtin_neon_vstl1q_lane_s64 }, }; #undef NEONMAP0 #undef NEONMAP1 #undef NEONMAP2 #define SVEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ { \ #NameBase, SVE::BI__builtin_sve_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ TypeModifier \ } #define SVEMAP2(NameBase, TypeModifier) \ { #NameBase, SVE::BI__builtin_sve_##NameBase, 0, 0, TypeModifier } static const ARMVectorIntrinsicInfo AArch64SVEIntrinsicMap[] = { #define GET_SVE_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sve_builtin_cg.inc" #include "clang/Basic/BuiltinsAArch64NeonSVEBridge_cg.def" #undef GET_SVE_LLVM_INTRINSIC_MAP }; #undef SVEMAP1 #undef SVEMAP2 #define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ { \ #NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ TypeModifier \ } #define SMEMAP2(NameBase, TypeModifier) \ { #NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier } static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { #define GET_SME_LLVM_INTRINSIC_MAP #include "clang/Basic/arm_sme_builtin_cg.inc" #undef GET_SME_LLVM_INTRINSIC_MAP }; #undef SMEMAP1 #undef SMEMAP2 static bool NEONSIMDIntrinsicsProvenSorted = false; static bool AArch64SIMDIntrinsicsProvenSorted = false; static bool AArch64SISDIntrinsicsProvenSorted = false; static bool AArch64SVEIntrinsicsProvenSorted = false; static bool AArch64SMEIntrinsicsProvenSorted = false; static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted) { #ifndef NDEBUG if (!MapProvenSorted) { assert(llvm::is_sorted(IntrinsicMap)); MapProvenSorted = true; } #endif const ARMVectorIntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID); if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) return Builtin; return nullptr; } Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgType, const CallExpr *E) { int VectorSize = 0; if (Modifier & Use64BitVectors) VectorSize = 64; else if (Modifier & Use128BitVectors) VectorSize = 128; // Return type. SmallVector Tys; if (Modifier & AddRetType) { llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); if (Modifier & VectorizeRetType) Ty = llvm::FixedVectorType::get( Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); Tys.push_back(Ty); } // Arguments. if (Modifier & VectorizeArgTypes) { int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; ArgType = llvm::FixedVectorType::get(ArgType, Elts); } if (Modifier & (Add1ArgType | Add2ArgTypes)) Tys.push_back(ArgType); if (Modifier & Add2ArgTypes) Tys.push_back(ArgType); if (Modifier & InventFloatType) Tys.push_back(FloatTy); return CGM.getIntrinsic(IntrinsicID, Tys); } static Value *EmitCommonNeonSISDBuiltinExpr( CodeGenFunction &CGF, const ARMVectorIntrinsicInfo &SISDInfo, SmallVectorImpl &Ops, const CallExpr *E) { unsigned BuiltinID = SISDInfo.BuiltinID; unsigned int Int = SISDInfo.LLVMIntrinsic; unsigned Modifier = SISDInfo.TypeModifier; const char *s = SISDInfo.NameHint; switch (BuiltinID) { case NEON::BI__builtin_neon_vcled_s64: case NEON::BI__builtin_neon_vcled_u64: case NEON::BI__builtin_neon_vcles_f32: case NEON::BI__builtin_neon_vcled_f64: case NEON::BI__builtin_neon_vcltd_s64: case NEON::BI__builtin_neon_vcltd_u64: case NEON::BI__builtin_neon_vclts_f32: case NEON::BI__builtin_neon_vcltd_f64: case NEON::BI__builtin_neon_vcales_f32: case NEON::BI__builtin_neon_vcaled_f64: case NEON::BI__builtin_neon_vcalts_f32: case NEON::BI__builtin_neon_vcaltd_f64: // Only one direction of comparisons actually exist, cmle is actually a cmge // with swapped operands. The table gives us the right intrinsic but we // still need to do the swap. std::swap(Ops[0], Ops[1]); break; } assert(Int && "Generic code assumes a valid intrinsic"); // Determine the type(s) of this overloaded AArch64 intrinsic. const Expr *Arg = E->getArg(0); llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); int j = 0; ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ++ai, ++j) { llvm::Type *ArgTy = ai->getType(); if (Ops[j]->getType()->getPrimitiveSizeInBits() == ArgTy->getPrimitiveSizeInBits()) continue; assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate // it before inserting. Ops[j] = CGF.Builder.CreateTruncOrBitCast( Ops[j], cast(ArgTy)->getElementType()); Ops[j] = CGF.Builder.CreateInsertElement(PoisonValue::get(ArgTy), Ops[j], C0); } Value *Result = CGF.EmitNeonCall(F, Ops, s); llvm::Type *ResultType = CGF.ConvertType(E->getType()); if (ResultType->getPrimitiveSizeInBits().getFixedValue() < Result->getType()->getPrimitiveSizeInBits().getFixedValue()) return CGF.Builder.CreateExtractElement(Result, C0); return CGF.Builder.CreateBitCast(Result, ResultType, s); } Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch) { // Get the last argument, which specifies the vector type. const Expr *Arg = E->getArg(E->getNumArgs() - 1); std::optional NeonTypeConst = Arg->getIntegerConstantExpr(getContext()); if (!NeonTypeConst) return nullptr; // Determine the type of this overloaded NEON intrinsic. NeonTypeFlags Type(NeonTypeConst->getZExtValue()); bool Usgn = Type.isUnsigned(); bool Quad = Type.isQuad(); const bool HasLegalHalfType = getTarget().hasLegalHalfType(); const bool AllowBFloatArgsAndRet = getTargetHooks().getABIInfo().allowBFloatArgsAndRet(); llvm::FixedVectorType *VTy = GetNeonType(this, Type, HasLegalHalfType, false, AllowBFloatArgsAndRet); llvm::Type *Ty = VTy; if (!Ty) return nullptr; auto getAlignmentValue32 = [&](Address addr) -> Value* { return Builder.getInt32(addr.getAlignment().getQuantity()); }; unsigned Int = LLVMIntrinsic; if ((Modifier & UnsignedAlts) && !Usgn) Int = AltLLVMIntrinsic; switch (BuiltinID) { default: break; case NEON::BI__builtin_neon_splat_lane_v: case NEON::BI__builtin_neon_splat_laneq_v: case NEON::BI__builtin_neon_splatq_lane_v: case NEON::BI__builtin_neon_splatq_laneq_v: { auto NumElements = VTy->getElementCount(); if (BuiltinID == NEON::BI__builtin_neon_splatq_lane_v) NumElements = NumElements * 2; if (BuiltinID == NEON::BI__builtin_neon_splat_laneq_v) NumElements = NumElements.divideCoefficientBy(2); Ops[0] = Builder.CreateBitCast(Ops[0], VTy); return EmitNeonSplat(Ops[0], cast(Ops[1]), NumElements); } case NEON::BI__builtin_neon_vpadd_v: case NEON::BI__builtin_neon_vpaddq_v: // We don't allow fp/int overloading of intrinsics. if (VTy->getElementType()->isFloatingPointTy() && Int == Intrinsic::aarch64_neon_addp) Int = Intrinsic::aarch64_neon_faddp; break; case NEON::BI__builtin_neon_vabs_v: case NEON::BI__builtin_neon_vabsq_v: if (VTy->getElementType()->isFloatingPointTy()) return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); case NEON::BI__builtin_neon_vadd_v: case NEON::BI__builtin_neon_vaddq_v: { llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, Quad ? 16 : 8); Ops[0] = Builder.CreateBitCast(Ops[0], VTy); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); return Builder.CreateBitCast(Ops[0], Ty); } case NEON::BI__builtin_neon_vaddhn_v: { llvm::FixedVectorType *SrcTy = llvm::FixedVectorType::getExtendedElementVectorType(VTy); // %sum = add <4 x i32> %lhs, %rhs Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); // %high = lshr <4 x i32> %sum, Constant *ShiftAmt = ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); // %res = trunc <4 x i32> %high to <4 x i16> return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); } case NEON::BI__builtin_neon_vcale_v: case NEON::BI__builtin_neon_vcaleq_v: case NEON::BI__builtin_neon_vcalt_v: case NEON::BI__builtin_neon_vcaltq_v: std::swap(Ops[0], Ops[1]); [[fallthrough]]; case NEON::BI__builtin_neon_vcage_v: case NEON::BI__builtin_neon_vcageq_v: case NEON::BI__builtin_neon_vcagt_v: case NEON::BI__builtin_neon_vcagtq_v: { llvm::Type *Ty; switch (VTy->getScalarSizeInBits()) { default: llvm_unreachable("unexpected type"); case 32: Ty = FloatTy; break; case 64: Ty = DoubleTy; break; case 16: Ty = HalfTy; break; } auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements()); llvm::Type *Tys[] = { VTy, VecFlt }; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); return EmitNeonCall(F, Ops, NameHint); } case NEON::BI__builtin_neon_vceqz_v: case NEON::BI__builtin_neon_vceqzq_v: return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); case NEON::BI__builtin_neon_vcgez_v: case NEON::BI__builtin_neon_vcgezq_v: return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); case NEON::BI__builtin_neon_vclez_v: case NEON::BI__builtin_neon_vclezq_v: return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); case NEON::BI__builtin_neon_vcgtz_v: case NEON::BI__builtin_neon_vcgtzq_v: return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); case NEON::BI__builtin_neon_vcltz_v: case NEON::BI__builtin_neon_vcltzq_v: return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); case NEON::BI__builtin_neon_vclz_v: case NEON::BI__builtin_neon_vclzq_v: // We generate target-independent intrinsic, which needs a second argument // for whether or not clz of zero is undefined; on ARM it isn't. Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); break; case NEON::BI__builtin_neon_vcvt_f32_v: case NEON::BI__builtin_neon_vcvtq_f32_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad), HasLegalHalfType); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_f16_s16: case NEON::BI__builtin_neon_vcvt_f16_u16: case NEON::BI__builtin_neon_vcvtq_f16_s16: case NEON::BI__builtin_neon_vcvtq_f16_u16: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad), HasLegalHalfType); return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_n_f16_s16: case NEON::BI__builtin_neon_vcvt_n_f16_u16: case NEON::BI__builtin_neon_vcvtq_n_f16_s16: case NEON::BI__builtin_neon_vcvtq_n_f16_u16: { llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvt_n"); } case NEON::BI__builtin_neon_vcvt_n_f32_v: case NEON::BI__builtin_neon_vcvt_n_f64_v: case NEON::BI__builtin_neon_vcvtq_n_f32_v: case NEON::BI__builtin_neon_vcvtq_n_f64_v: { llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvt_n"); } case NEON::BI__builtin_neon_vcvt_n_s16_f16: case NEON::BI__builtin_neon_vcvt_n_s32_v: case NEON::BI__builtin_neon_vcvt_n_u16_f16: case NEON::BI__builtin_neon_vcvt_n_u32_v: case NEON::BI__builtin_neon_vcvt_n_s64_v: case NEON::BI__builtin_neon_vcvt_n_u64_v: case NEON::BI__builtin_neon_vcvtq_n_s16_f16: case NEON::BI__builtin_neon_vcvtq_n_s32_v: case NEON::BI__builtin_neon_vcvtq_n_u16_f16: case NEON::BI__builtin_neon_vcvtq_n_u32_v: case NEON::BI__builtin_neon_vcvtq_n_s64_v: case NEON::BI__builtin_neon_vcvtq_n_u64_v: { llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); return EmitNeonCall(F, Ops, "vcvt_n"); } case NEON::BI__builtin_neon_vcvt_s32_v: case NEON::BI__builtin_neon_vcvt_u32_v: case NEON::BI__builtin_neon_vcvt_s64_v: case NEON::BI__builtin_neon_vcvt_u64_v: case NEON::BI__builtin_neon_vcvt_s16_f16: case NEON::BI__builtin_neon_vcvt_u16_f16: case NEON::BI__builtin_neon_vcvtq_s32_v: case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: case NEON::BI__builtin_neon_vcvtq_s16_f16: case NEON::BI__builtin_neon_vcvtq_u16_f16: { Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); } case NEON::BI__builtin_neon_vcvta_s16_f16: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvta_s64_v: case NEON::BI__builtin_neon_vcvta_u16_f16: case NEON::BI__builtin_neon_vcvta_u32_v: case NEON::BI__builtin_neon_vcvta_u64_v: case NEON::BI__builtin_neon_vcvtaq_s16_f16: case NEON::BI__builtin_neon_vcvtaq_s32_v: case NEON::BI__builtin_neon_vcvtaq_s64_v: case NEON::BI__builtin_neon_vcvtaq_u16_f16: case NEON::BI__builtin_neon_vcvtaq_u32_v: case NEON::BI__builtin_neon_vcvtaq_u64_v: case NEON::BI__builtin_neon_vcvtn_s16_f16: case NEON::BI__builtin_neon_vcvtn_s32_v: case NEON::BI__builtin_neon_vcvtn_s64_v: case NEON::BI__builtin_neon_vcvtn_u16_f16: case NEON::BI__builtin_neon_vcvtn_u32_v: case NEON::BI__builtin_neon_vcvtn_u64_v: case NEON::BI__builtin_neon_vcvtnq_s16_f16: case NEON::BI__builtin_neon_vcvtnq_s32_v: case NEON::BI__builtin_neon_vcvtnq_s64_v: case NEON::BI__builtin_neon_vcvtnq_u16_f16: case NEON::BI__builtin_neon_vcvtnq_u32_v: case NEON::BI__builtin_neon_vcvtnq_u64_v: case NEON::BI__builtin_neon_vcvtp_s16_f16: case NEON::BI__builtin_neon_vcvtp_s32_v: case NEON::BI__builtin_neon_vcvtp_s64_v: case NEON::BI__builtin_neon_vcvtp_u16_f16: case NEON::BI__builtin_neon_vcvtp_u32_v: case NEON::BI__builtin_neon_vcvtp_u64_v: case NEON::BI__builtin_neon_vcvtpq_s16_f16: case NEON::BI__builtin_neon_vcvtpq_s32_v: case NEON::BI__builtin_neon_vcvtpq_s64_v: case NEON::BI__builtin_neon_vcvtpq_u16_f16: case NEON::BI__builtin_neon_vcvtpq_u32_v: case NEON::BI__builtin_neon_vcvtpq_u64_v: case NEON::BI__builtin_neon_vcvtm_s16_f16: case NEON::BI__builtin_neon_vcvtm_s32_v: case NEON::BI__builtin_neon_vcvtm_s64_v: case NEON::BI__builtin_neon_vcvtm_u16_f16: case NEON::BI__builtin_neon_vcvtm_u32_v: case NEON::BI__builtin_neon_vcvtm_u64_v: case NEON::BI__builtin_neon_vcvtmq_s16_f16: case NEON::BI__builtin_neon_vcvtmq_s32_v: case NEON::BI__builtin_neon_vcvtmq_s64_v: case NEON::BI__builtin_neon_vcvtmq_u16_f16: case NEON::BI__builtin_neon_vcvtmq_u32_v: case NEON::BI__builtin_neon_vcvtmq_u64_v: { llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); } case NEON::BI__builtin_neon_vcvtx_f32_v: { llvm::Type *Tys[2] = { VTy->getTruncatedElementVectorType(VTy), Ty}; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); } case NEON::BI__builtin_neon_vext_v: case NEON::BI__builtin_neon_vextq_v: { int CV = cast(Ops[2])->getSExtValue(); SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(i+CV); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); } case NEON::BI__builtin_neon_vfma_v: case NEON::BI__builtin_neon_vfmaq_v: { Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); // NEON intrinsic puts accumulator first, unlike the LLVM fma. return emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { llvm::Type *Tys[] = {Ty, Int8PtrTy}; Ops.push_back(getAlignmentValue32(PtrOp0)); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); } case NEON::BI__builtin_neon_vld1_x2_v: case NEON::BI__builtin_neon_vld1q_x2_v: case NEON::BI__builtin_neon_vld1_x3_v: case NEON::BI__builtin_neon_vld1q_x3_v: case NEON::BI__builtin_neon_vld1_x4_v: case NEON::BI__builtin_neon_vld1q_x4_v: { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: case NEON::BI__builtin_neon_vld2_dup_v: case NEON::BI__builtin_neon_vld2q_dup_v: case NEON::BI__builtin_neon_vld3_dup_v: case NEON::BI__builtin_neon_vld3q_dup_v: case NEON::BI__builtin_neon_vld4_dup_v: case NEON::BI__builtin_neon_vld4q_dup_v: { llvm::Type *Tys[] = {Ty, Int8PtrTy}; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); Value *Align = getAlignmentValue32(PtrOp1); Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = PoisonValue::get(Ty); PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); LoadInst *Ld = Builder.CreateLoad(PtrOp0); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Ops[0] = Builder.CreateInsertElement(V, Ld, CI); return EmitNeonSplat(Ops[0], CI); } case NEON::BI__builtin_neon_vld2_lane_v: case NEON::BI__builtin_neon_vld2q_lane_v: case NEON::BI__builtin_neon_vld3_lane_v: case NEON::BI__builtin_neon_vld3q_lane_v: case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: { llvm::Type *Tys[] = {Ty, Int8PtrTy}; Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); for (unsigned I = 2; I < Ops.size() - 1; ++I) Ops[I] = Builder.CreateBitCast(Ops[I], Ty); Ops.push_back(getAlignmentValue32(PtrOp1)); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), NameHint); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vmovl_v: { llvm::FixedVectorType *DTy = llvm::FixedVectorType::getTruncatedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], DTy); if (Usgn) return Builder.CreateZExt(Ops[0], Ty, "vmovl"); return Builder.CreateSExt(Ops[0], Ty, "vmovl"); } case NEON::BI__builtin_neon_vmovn_v: { llvm::FixedVectorType *QTy = llvm::FixedVectorType::getExtendedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], QTy); return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); } case NEON::BI__builtin_neon_vmull_v: // FIXME: the integer vmull operations could be emitted in terms of pure // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of // hoisting the exts outside loops. Until global ISel comes along that can // see through such movement this leads to bad CodeGen. So we need an // intrinsic for now. Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); case NEON::BI__builtin_neon_vpadal_v: case NEON::BI__builtin_neon_vpadalq_v: { // The source operand type has twice as many elements of half the size. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); auto *NarrowTy = llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); llvm::Type *Tys[2] = { Ty, NarrowTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); } case NEON::BI__builtin_neon_vpaddl_v: case NEON::BI__builtin_neon_vpaddlq_v: { // The source operand type has twice as many elements of half the size. unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); auto *NarrowTy = llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2); llvm::Type *Tys[2] = { Ty, NarrowTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); } case NEON::BI__builtin_neon_vqdmlal_v: case NEON::BI__builtin_neon_vqdmlsl_v: { SmallVector MulOps(Ops.begin() + 1, Ops.end()); Ops[1] = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); Ops.resize(2); return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); } case NEON::BI__builtin_neon_vqdmulhq_lane_v: case NEON::BI__builtin_neon_vqdmulh_lane_v: case NEON::BI__builtin_neon_vqrdmulhq_lane_v: case NEON::BI__builtin_neon_vqrdmulh_lane_v: { auto *RTy = cast(Ty); if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v || BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v) RTy = llvm::FixedVectorType::get(RTy->getElementType(), RTy->getNumElements() * 2); llvm::Type *Tys[2] = { RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, /*isQuad*/ false))}; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); } case NEON::BI__builtin_neon_vqdmulhq_laneq_v: case NEON::BI__builtin_neon_vqdmulh_laneq_v: case NEON::BI__builtin_neon_vqrdmulhq_laneq_v: case NEON::BI__builtin_neon_vqrdmulh_laneq_v: { llvm::Type *Tys[2] = { Ty, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false, /*isQuad*/ true))}; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); } case NEON::BI__builtin_neon_vqshl_n_v: case NEON::BI__builtin_neon_vqshlq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 1, false); case NEON::BI__builtin_neon_vqshlu_n_v: case NEON::BI__builtin_neon_vqshluq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 1, false); case NEON::BI__builtin_neon_vrecpe_v: case NEON::BI__builtin_neon_vrecpeq_v: case NEON::BI__builtin_neon_vrsqrte_v: case NEON::BI__builtin_neon_vrsqrteq_v: Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); case NEON::BI__builtin_neon_vrndi_v: case NEON::BI__builtin_neon_vrndiq_v: Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_nearbyint : Intrinsic::nearbyint; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); case NEON::BI__builtin_neon_vrshr_n_v: case NEON::BI__builtin_neon_vrshrq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); case NEON::BI__builtin_neon_vsha512hq_u64: case NEON::BI__builtin_neon_vsha512h2q_u64: case NEON::BI__builtin_neon_vsha512su0q_u64: case NEON::BI__builtin_neon_vsha512su1q_u64: { Function *F = CGM.getIntrinsic(Int); return EmitNeonCall(F, Ops, ""); } case NEON::BI__builtin_neon_vshl_n_v: case NEON::BI__builtin_neon_vshlq_n_v: Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n"); case NEON::BI__builtin_neon_vshll_n_v: { llvm::FixedVectorType *SrcTy = llvm::FixedVectorType::getTruncatedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); if (Usgn) Ops[0] = Builder.CreateZExt(Ops[0], VTy); else Ops[0] = Builder.CreateSExt(Ops[0], VTy); Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); } case NEON::BI__builtin_neon_vshrn_n_v: { llvm::FixedVectorType *SrcTy = llvm::FixedVectorType::getExtendedElementVectorType(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); if (Usgn) Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); else Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); } case NEON::BI__builtin_neon_vshr_n_v: case NEON::BI__builtin_neon_vshrq_n_v: return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: case NEON::BI__builtin_neon_vst2_v: case NEON::BI__builtin_neon_vst2q_v: case NEON::BI__builtin_neon_vst3_v: case NEON::BI__builtin_neon_vst3q_v: case NEON::BI__builtin_neon_vst4_v: case NEON::BI__builtin_neon_vst4q_v: case NEON::BI__builtin_neon_vst2_lane_v: case NEON::BI__builtin_neon_vst2q_lane_v: case NEON::BI__builtin_neon_vst3_lane_v: case NEON::BI__builtin_neon_vst3q_lane_v: case NEON::BI__builtin_neon_vst4_lane_v: case NEON::BI__builtin_neon_vst4q_lane_v: { llvm::Type *Tys[] = {Int8PtrTy, Ty}; Ops.push_back(getAlignmentValue32(PtrOp0)); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); } case NEON::BI__builtin_neon_vsm3partw1q_u32: case NEON::BI__builtin_neon_vsm3partw2q_u32: case NEON::BI__builtin_neon_vsm3ss1q_u32: case NEON::BI__builtin_neon_vsm4ekeyq_u32: case NEON::BI__builtin_neon_vsm4eq_u32: { Function *F = CGM.getIntrinsic(Int); return EmitNeonCall(F, Ops, ""); } case NEON::BI__builtin_neon_vsm3tt1aq_u32: case NEON::BI__builtin_neon_vsm3tt1bq_u32: case NEON::BI__builtin_neon_vsm3tt2aq_u32: case NEON::BI__builtin_neon_vsm3tt2bq_u32: { Function *F = CGM.getIntrinsic(Int); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); return EmitNeonCall(F, Ops, ""); } case NEON::BI__builtin_neon_vst1_x2_v: case NEON::BI__builtin_neon_vst1q_x2_v: case NEON::BI__builtin_neon_vst1_x3_v: case NEON::BI__builtin_neon_vst1q_x3_v: case NEON::BI__builtin_neon_vst1_x4_v: case NEON::BI__builtin_neon_vst1q_x4_v: { // TODO: Currently in AArch32 mode the pointer operand comes first, whereas // in AArch64 it comes last. We may want to stick to one or another. if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be || Arch == llvm::Triple::aarch64_32) { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); } llvm::Type *Tys[2] = {UnqualPtrTy, VTy}; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, ""); } case NEON::BI__builtin_neon_vsubhn_v: { llvm::FixedVectorType *SrcTy = llvm::FixedVectorType::getExtendedElementVectorType(VTy); // %sum = add <4 x i32> %lhs, %rhs Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); // %high = lshr <4 x i32> %sum, Constant *ShiftAmt = ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); // %res = trunc <4 x i32> %high to <4 x i16> return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); } case NEON::BI__builtin_neon_vtrn_v: case NEON::BI__builtin_neon_vtrnq_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back(i+vi); Indices.push_back(i+e+vi); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } case NEON::BI__builtin_neon_vtst_v: case NEON::BI__builtin_neon_vtstq_v: { Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], ConstantAggregateZero::get(Ty)); return Builder.CreateSExt(Ops[0], Ty, "vtst"); } case NEON::BI__builtin_neon_vuzp_v: case NEON::BI__builtin_neon_vuzpq_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(2*i+vi); Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } case NEON::BI__builtin_neon_vxarq_u64: { Function *F = CGM.getIntrinsic(Int); Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); return EmitNeonCall(F, Ops, ""); } case NEON::BI__builtin_neon_vzip_v: case NEON::BI__builtin_neon_vzipq_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back((i + vi*e) >> 1); Indices.push_back(((i + vi*e) >> 1)+e); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } case NEON::BI__builtin_neon_vdot_s32: case NEON::BI__builtin_neon_vdot_u32: case NEON::BI__builtin_neon_vdotq_s32: case NEON::BI__builtin_neon_vdotq_u32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot"); } case NEON::BI__builtin_neon_vfmlal_low_f16: case NEON::BI__builtin_neon_vfmlalq_low_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low"); } case NEON::BI__builtin_neon_vfmlsl_low_f16: case NEON::BI__builtin_neon_vfmlslq_low_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low"); } case NEON::BI__builtin_neon_vfmlal_high_f16: case NEON::BI__builtin_neon_vfmlalq_high_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high"); } case NEON::BI__builtin_neon_vfmlsl_high_f16: case NEON::BI__builtin_neon_vfmlslq_high_f16: { auto *InputTy = llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high"); } case NEON::BI__builtin_neon_vmmlaq_s32: case NEON::BI__builtin_neon_vmmlaq_u32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vmmla"); } case NEON::BI__builtin_neon_vusmmlaq_s32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla"); } case NEON::BI__builtin_neon_vusdot_s32: case NEON::BI__builtin_neon_vusdotq_s32: { auto *InputTy = llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot"); } case NEON::BI__builtin_neon_vbfdot_f32: case NEON::BI__builtin_neon_vbfdotq_f32: { llvm::Type *InputTy = llvm::FixedVectorType::get(BFloatTy, Ty->getPrimitiveSizeInBits() / 16); llvm::Type *Tys[2] = { Ty, InputTy }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vbfdot"); } case NEON::BI__builtin_neon___a32_vcvt_bf16_f32: { llvm::Type *Tys[1] = { Ty }; Function *F = CGM.getIntrinsic(Int, Tys); return EmitNeonCall(F, Ops, "vcvtfp2bf"); } } assert(Int && "Expected valid intrinsic number"); // Determine the type(s) of this overloaded AArch64 intrinsic. Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); Value *Result = EmitNeonCall(F, Ops, NameHint); llvm::Type *ResultType = ConvertType(E->getType()); // AArch64 intrinsic one-element vector type cast to // scalar type expected by the builtin return Builder.CreateBitCast(Result, ResultType, NameHint); } Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, const CmpInst::Predicate Ip, const Twine &Name) { llvm::Type *OTy = Op->getType(); // FIXME: this is utterly horrific. We should not be looking at previous // codegen context to find out what needs doing. Unfortunately TableGen // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 // (etc). if (BitCastInst *BI = dyn_cast(Op)) OTy = BI->getOperand(0)->getType(); Op = Builder.CreateBitCast(Op, OTy); if (OTy->getScalarType()->isFloatingPointTy()) { if (Fp == CmpInst::FCMP_OEQ) Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); else Op = Builder.CreateFCmpS(Fp, Op, Constant::getNullValue(OTy)); } else { Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); } return Builder.CreateSExt(Op, Ty, Name); } static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name) { SmallVector TblOps; if (ExtOp) TblOps.push_back(ExtOp); // Build a vector containing sequential number like (0, 1, 2, ..., 15) SmallVector Indices; auto *TblTy = cast(Ops[0]->getType()); for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { Indices.push_back(2*i); Indices.push_back(2*i+1); } int PairPos = 0, End = Ops.size() - 1; while (PairPos < End) { TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], Ops[PairPos+1], Indices, Name)); PairPos += 2; } // If there's an odd number of 64-bit lookup table, fill the high 64-bit // of the 128-bit lookup table with zero. if (PairPos == End) { Value *ZeroTbl = ConstantAggregateZero::get(TblTy); TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], ZeroTbl, Indices, Name)); } Function *TblF; TblOps.push_back(IndexOp); TblF = CGF.CGM.getIntrinsic(IntID, ResTy); return CGF.EmitNeonCall(TblF, TblOps, Name); } Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { unsigned Value; switch (BuiltinID) { default: return nullptr; case clang::ARM::BI__builtin_arm_nop: Value = 0; break; case clang::ARM::BI__builtin_arm_yield: case clang::ARM::BI__yield: Value = 1; break; case clang::ARM::BI__builtin_arm_wfe: case clang::ARM::BI__wfe: Value = 2; break; case clang::ARM::BI__builtin_arm_wfi: case clang::ARM::BI__wfi: Value = 3; break; case clang::ARM::BI__builtin_arm_sev: case clang::ARM::BI__sev: Value = 4; break; case clang::ARM::BI__builtin_arm_sevl: case clang::ARM::BI__sevl: Value = 5; break; } return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), llvm::ConstantInt::get(Int32Ty, Value)); } enum SpecialRegisterAccessKind { NormalRead, VolatileRead, Write, }; // Generates the IR for __builtin_read_exec_*. // Lowers the builtin to amdgcn_ballot intrinsic. static Value *EmitAMDGCNBallotForExec(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool isExecHi) { CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {RegisterType}); llvm::Value *Call = Builder.CreateCall(F, {Builder.getInt1(true)}); if (isExecHi) { Value *Rt2 = Builder.CreateLShr(Call, 32); Rt2 = Builder.CreateTrunc(Rt2, CGF.Int32Ty); return Rt2; } return Call; } // Generates the IR for the read/write special register builtin, // ValueType is the type of the value that is to be written or read, // RegisterType is the type of the register being written to or read from. static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, SpecialRegisterAccessKind AccessKind, StringRef SysReg = "") { // write and register intrinsics only support 32, 64 and 128 bit operations. assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64) || RegisterType->isIntegerTy(128)) && "Unsupported size for register."); CodeGen::CGBuilderTy &Builder = CGF.Builder; CodeGen::CodeGenModule &CGM = CGF.CGM; LLVMContext &Context = CGM.getLLVMContext(); if (SysReg.empty()) { const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); SysReg = cast(SysRegStrExpr)->getString(); } llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Type *Types[] = { RegisterType }; bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) && "Can't fit 64-bit value in 32-bit register"); if (AccessKind != Write) { assert(AccessKind == NormalRead || AccessKind == VolatileRead); llvm::Function *F = CGM.getIntrinsic( AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register : llvm::Intrinsic::read_register, Types); llvm::Value *Call = Builder.CreateCall(F, Metadata); if (MixedTypes) // Read into 64 bit register and then truncate result to 32 bit. return Builder.CreateTrunc(Call, ValueType); if (ValueType->isPointerTy()) // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). return Builder.CreateIntToPtr(Call, ValueType); return Call; } llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); if (MixedTypes) { // Extend 32 bit write value to 64 bit to pass to write. ArgValue = Builder.CreateZExt(ArgValue, RegisterType); return Builder.CreateCall(F, { Metadata, ArgValue }); } if (ValueType->isPointerTy()) { // Have VoidPtrTy ArgValue but want to return an i32/i64. ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); return Builder.CreateCall(F, { Metadata, ArgValue }); } return Builder.CreateCall(F, { Metadata, ArgValue }); } /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra /// argument that specifies the vector type. static bool HasExtraNeonArgument(unsigned BuiltinID) { switch (BuiltinID) { default: break; case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vget_lane_i16: case NEON::BI__builtin_neon_vget_lane_bf16: case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vget_lane_i64: case NEON::BI__builtin_neon_vget_lane_f32: case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vgetq_lane_i16: case NEON::BI__builtin_neon_vgetq_lane_bf16: case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vgetq_lane_i64: case NEON::BI__builtin_neon_vgetq_lane_f32: case NEON::BI__builtin_neon_vduph_lane_bf16: case NEON::BI__builtin_neon_vduph_laneq_bf16: case NEON::BI__builtin_neon_vset_lane_i8: case NEON::BI__builtin_neon_vset_lane_i16: case NEON::BI__builtin_neon_vset_lane_bf16: case NEON::BI__builtin_neon_vset_lane_i32: case NEON::BI__builtin_neon_vset_lane_i64: case NEON::BI__builtin_neon_vset_lane_f32: case NEON::BI__builtin_neon_vsetq_lane_i8: case NEON::BI__builtin_neon_vsetq_lane_i16: case NEON::BI__builtin_neon_vsetq_lane_bf16: case NEON::BI__builtin_neon_vsetq_lane_i32: case NEON::BI__builtin_neon_vsetq_lane_i64: case NEON::BI__builtin_neon_vsetq_lane_f32: case NEON::BI__builtin_neon_vsha1h_u32: case NEON::BI__builtin_neon_vsha1cq_u32: case NEON::BI__builtin_neon_vsha1pq_u32: case NEON::BI__builtin_neon_vsha1mq_u32: case NEON::BI__builtin_neon_vcvth_bf16_f32: case clang::ARM::BI_MoveToCoprocessor: case clang::ARM::BI_MoveToCoprocessor2: return false; } return true; } Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { if (auto Hint = GetValueForARMHint(BuiltinID)) return Hint; if (BuiltinID == clang::ARM::BI__emit) { bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic=*/false); Expr::EvalResult Result; if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); llvm::APSInt Value = Result.Val.getInt(); uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); llvm::InlineAsm *Emit = IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", /*hasSideEffects=*/true) : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", /*hasSideEffects=*/true); return Builder.CreateCall(Emit); } if (BuiltinID == clang::ARM::BI__builtin_arm_dbg) { Value *Option = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); } if (BuiltinID == clang::ARM::BI__builtin_arm_prefetch) { Value *Address = EmitScalarExpr(E->getArg(0)); Value *RW = EmitScalarExpr(E->getArg(1)); Value *IsData = EmitScalarExpr(E->getArg(2)); // Locality is not supported on ARM target Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, IsData}); } if (BuiltinID == clang::ARM::BI__builtin_arm_rbit) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == clang::ARM::BI__builtin_arm_clz || BuiltinID == clang::ARM::BI__builtin_arm_clz64) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); if (BuiltinID == clang::ARM::BI__builtin_arm_clz64) Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); return Res; } if (BuiltinID == clang::ARM::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls), Arg, "cls"); } if (BuiltinID == clang::ARM::BI__builtin_arm_cls64) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_cls64), Arg, "cls"); } if (BuiltinID == clang::ARM::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); Value *Ops[2]; for (unsigned i = 0; i < 2; i++) Ops[i] = EmitScalarExpr(E->getArg(i)); llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); llvm::FunctionType *FTy = cast(Ty); StringRef Name = FD->getName(); return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); } if (BuiltinID == clang::ARM::BI__builtin_arm_mcrr || BuiltinID == clang::ARM::BI__builtin_arm_mcrr2) { Function *F; switch (BuiltinID) { default: llvm_unreachable("unexpected builtin"); case clang::ARM::BI__builtin_arm_mcrr: F = CGM.getIntrinsic(Intrinsic::arm_mcrr); break; case clang::ARM::BI__builtin_arm_mcrr2: F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); break; } // MCRR{2} instruction has 5 operands but // the intrinsic has 4 because Rt and Rt2 // are represented as a single unsigned 64 // bit integer in the intrinsic definition // but internally it's represented as 2 32 // bit integers. Value *Coproc = EmitScalarExpr(E->getArg(0)); Value *Opc1 = EmitScalarExpr(E->getArg(1)); Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); Value *CRm = EmitScalarExpr(E->getArg(3)); Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); } if (BuiltinID == clang::ARM::BI__builtin_arm_mrrc || BuiltinID == clang::ARM::BI__builtin_arm_mrrc2) { Function *F; switch (BuiltinID) { default: llvm_unreachable("unexpected builtin"); case clang::ARM::BI__builtin_arm_mrrc: F = CGM.getIntrinsic(Intrinsic::arm_mrrc); break; case clang::ARM::BI__builtin_arm_mrrc2: F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); break; } Value *Coproc = EmitScalarExpr(E->getArg(0)); Value *Opc1 = EmitScalarExpr(E->getArg(1)); Value *CRm = EmitScalarExpr(E->getArg(2)); Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); // Returns an unsigned 64 bit integer, represented // as two 32 bit integers. Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); Rt = Builder.CreateZExt(Rt, Int64Ty); Rt1 = Builder.CreateZExt(Rt1, Int64Ty); Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); } if (BuiltinID == clang::ARM::BI__builtin_arm_ldrexd || ((BuiltinID == clang::ARM::BI__builtin_arm_ldrex || BuiltinID == clang::ARM::BI__builtin_arm_ldaex) && getContext().getTypeSize(E->getType()) == 64) || BuiltinID == clang::ARM::BI__ldrexd) { Function *F; switch (BuiltinID) { default: llvm_unreachable("unexpected builtin"); case clang::ARM::BI__builtin_arm_ldaex: F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); break; case clang::ARM::BI__builtin_arm_ldrexd: case clang::ARM::BI__builtin_arm_ldrex: case clang::ARM::BI__ldrexd: F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); break; } Value *LdPtr = EmitScalarExpr(E->getArg(0)); Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd"); Value *Val0 = Builder.CreateExtractValue(Val, 1); Value *Val1 = Builder.CreateExtractValue(Val, 0); Val0 = Builder.CreateZExt(Val0, Int64Ty); Val1 = Builder.CreateZExt(Val1, Int64Ty); Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); Val = Builder.CreateOr(Val, Val1); return Builder.CreateBitCast(Val, ConvertType(E->getType())); } if (BuiltinID == clang::ARM::BI__builtin_arm_ldrex || BuiltinID == clang::ARM::BI__builtin_arm_ldaex) { Value *LoadAddr = EmitScalarExpr(E->getArg(0)); QualType Ty = E->getType(); llvm::Type *RealResTy = ConvertType(Ty); llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); Function *F = CGM.getIntrinsic( BuiltinID == clang::ARM::BI__builtin_arm_ldaex ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex, UnqualPtrTy); CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); Val->addParamAttr( 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); if (RealResTy->isPointerTy()) return Builder.CreateIntToPtr(Val, RealResTy); else { llvm::Type *IntResTy = llvm::IntegerType::get( getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), RealResTy); } } if (BuiltinID == clang::ARM::BI__builtin_arm_strexd || ((BuiltinID == clang::ARM::BI__builtin_arm_stlex || BuiltinID == clang::ARM::BI__builtin_arm_strex) && getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { Function *F = CGM.getIntrinsic( BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd); llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); Value *Val = EmitScalarExpr(E->getArg(0)); Builder.CreateStore(Val, Tmp); Address LdPtr = Tmp.withElementType(STy); Val = Builder.CreateLoad(LdPtr); Value *Arg0 = Builder.CreateExtractValue(Val, 0); Value *Arg1 = Builder.CreateExtractValue(Val, 1); Value *StPtr = EmitScalarExpr(E->getArg(1)); return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); } if (BuiltinID == clang::ARM::BI__builtin_arm_strex || BuiltinID == clang::ARM::BI__builtin_arm_stlex) { Value *StoreVal = EmitScalarExpr(E->getArg(0)); Value *StoreAddr = EmitScalarExpr(E->getArg(1)); QualType Ty = E->getArg(0)->getType(); llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); if (StoreVal->getType()->isPointerTy()) StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); else { llvm::Type *IntTy = llvm::IntegerType::get( getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); StoreVal = Builder.CreateBitCast(StoreVal, IntTy); StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); } Function *F = CGM.getIntrinsic( BuiltinID == clang::ARM::BI__builtin_arm_stlex ? Intrinsic::arm_stlex : Intrinsic::arm_strex, StoreAddr->getType()); CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); CI->addParamAttr( 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); return CI; } if (BuiltinID == clang::ARM::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); return Builder.CreateCall(F); } // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { case clang::ARM::BI__builtin_arm_crc32b: CRCIntrinsicID = Intrinsic::arm_crc32b; break; case clang::ARM::BI__builtin_arm_crc32cb: CRCIntrinsicID = Intrinsic::arm_crc32cb; break; case clang::ARM::BI__builtin_arm_crc32h: CRCIntrinsicID = Intrinsic::arm_crc32h; break; case clang::ARM::BI__builtin_arm_crc32ch: CRCIntrinsicID = Intrinsic::arm_crc32ch; break; case clang::ARM::BI__builtin_arm_crc32w: case clang::ARM::BI__builtin_arm_crc32d: CRCIntrinsicID = Intrinsic::arm_crc32w; break; case clang::ARM::BI__builtin_arm_crc32cw: case clang::ARM::BI__builtin_arm_crc32cd: CRCIntrinsicID = Intrinsic::arm_crc32cw; break; } if (CRCIntrinsicID != Intrinsic::not_intrinsic) { Value *Arg0 = EmitScalarExpr(E->getArg(0)); Value *Arg1 = EmitScalarExpr(E->getArg(1)); // crc32{c,}d intrinsics are implemented as two calls to crc32{c,}w // intrinsics, hence we need different codegen for these cases. if (BuiltinID == clang::ARM::BI__builtin_arm_crc32d || BuiltinID == clang::ARM::BI__builtin_arm_crc32cd) { Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); Value *Arg1b = Builder.CreateLShr(Arg1, C1); Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); Function *F = CGM.getIntrinsic(CRCIntrinsicID); Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); return Builder.CreateCall(F, {Res, Arg1b}); } else { Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); Function *F = CGM.getIntrinsic(CRCIntrinsicID); return Builder.CreateCall(F, {Arg0, Arg1}); } } if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || BuiltinID == clang::ARM::BI__builtin_arm_rsrp || BuiltinID == clang::ARM::BI__builtin_arm_wsr || BuiltinID == clang::ARM::BI__builtin_arm_wsr64 || BuiltinID == clang::ARM::BI__builtin_arm_wsrp) { SpecialRegisterAccessKind AccessKind = Write; if (BuiltinID == clang::ARM::BI__builtin_arm_rsr || BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || BuiltinID == clang::ARM::BI__builtin_arm_rsrp) AccessKind = VolatileRead; bool IsPointerBuiltin = BuiltinID == clang::ARM::BI__builtin_arm_rsrp || BuiltinID == clang::ARM::BI__builtin_arm_wsrp; bool Is64Bit = BuiltinID == clang::ARM::BI__builtin_arm_rsr64 || BuiltinID == clang::ARM::BI__builtin_arm_wsr64; llvm::Type *ValueType; llvm::Type *RegisterType; if (IsPointerBuiltin) { ValueType = VoidPtrTy; RegisterType = Int32Ty; } else if (Is64Bit) { ValueType = RegisterType = Int64Ty; } else { ValueType = RegisterType = Int32Ty; } return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, AccessKind); } if (BuiltinID == ARM::BI__builtin_sponentry) { llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); return Builder.CreateCall(F); } // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional MsvcIntId = translateArmToMsvcIntrin(BuiltinID)) return EmitMSVCBuiltinExpr(*MsvcIntId, E); // Deal with MVE builtins if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) return Result; // Handle CDE builtins if (Value *Result = EmitARMCDEBuiltinExpr(BuiltinID, E, ReturnValue, Arch)) return Result; // Some intrinsics are equivalent - if they are use the base intrinsic ID. auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) { return P.first == BuiltinID; }); if (It != end(NEONEquivalentIntrinsicMap)) BuiltinID = It->second; // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); auto getAlignmentValue32 = [&](Address addr) -> Value* { return Builder.getInt32(addr.getAlignment().getQuantity()); }; Address PtrOp0 = Address::invalid(); Address PtrOp1 = Address::invalid(); SmallVector Ops; bool HasExtraArg = HasExtraNeonArgument(BuiltinID); unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); for (unsigned i = 0, e = NumArgs; i != e; i++) { if (i == 0) { switch (BuiltinID) { case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: case NEON::BI__builtin_neon_vld1q_lane_v: case NEON::BI__builtin_neon_vld1_lane_v: case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: case NEON::BI__builtin_neon_vst1q_lane_v: case NEON::BI__builtin_neon_vst1_lane_v: case NEON::BI__builtin_neon_vst2_v: case NEON::BI__builtin_neon_vst2q_v: case NEON::BI__builtin_neon_vst2_lane_v: case NEON::BI__builtin_neon_vst2q_lane_v: case NEON::BI__builtin_neon_vst3_v: case NEON::BI__builtin_neon_vst3q_v: case NEON::BI__builtin_neon_vst3_lane_v: case NEON::BI__builtin_neon_vst3q_lane_v: case NEON::BI__builtin_neon_vst4_v: case NEON::BI__builtin_neon_vst4q_v: case NEON::BI__builtin_neon_vst4_lane_v: case NEON::BI__builtin_neon_vst4q_lane_v: // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); Ops.push_back(PtrOp0.emitRawPointer(*this)); continue; } } if (i == 1) { switch (BuiltinID) { case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: case NEON::BI__builtin_neon_vld2_lane_v: case NEON::BI__builtin_neon_vld2q_lane_v: case NEON::BI__builtin_neon_vld3_lane_v: case NEON::BI__builtin_neon_vld3q_lane_v: case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: case NEON::BI__builtin_neon_vld2_dup_v: case NEON::BI__builtin_neon_vld2q_dup_v: case NEON::BI__builtin_neon_vld3_dup_v: case NEON::BI__builtin_neon_vld3q_dup_v: case NEON::BI__builtin_neon_vld4_dup_v: case NEON::BI__builtin_neon_vld4q_dup_v: // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); Ops.push_back(PtrOp1.emitRawPointer(*this)); continue; } } Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } switch (BuiltinID) { default: break; case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vget_lane_i16: case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vget_lane_i64: case NEON::BI__builtin_neon_vget_lane_bf16: case NEON::BI__builtin_neon_vget_lane_f32: case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vgetq_lane_i16: case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vgetq_lane_i64: case NEON::BI__builtin_neon_vgetq_lane_bf16: case NEON::BI__builtin_neon_vgetq_lane_f32: case NEON::BI__builtin_neon_vduph_lane_bf16: case NEON::BI__builtin_neon_vduph_laneq_bf16: return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); case NEON::BI__builtin_neon_vrndns_f32: { Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Tys[] = {Arg->getType()}; Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys); return Builder.CreateCall(F, {Arg}, "vrndn"); } case NEON::BI__builtin_neon_vset_lane_i8: case NEON::BI__builtin_neon_vset_lane_i16: case NEON::BI__builtin_neon_vset_lane_i32: case NEON::BI__builtin_neon_vset_lane_i64: case NEON::BI__builtin_neon_vset_lane_bf16: case NEON::BI__builtin_neon_vset_lane_f32: case NEON::BI__builtin_neon_vsetq_lane_i8: case NEON::BI__builtin_neon_vsetq_lane_i16: case NEON::BI__builtin_neon_vsetq_lane_i32: case NEON::BI__builtin_neon_vsetq_lane_i64: case NEON::BI__builtin_neon_vsetq_lane_bf16: case NEON::BI__builtin_neon_vsetq_lane_f32: return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vsha1h_u32: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, "vsha1h"); case NEON::BI__builtin_neon_vsha1cq_u32: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, "vsha1h"); case NEON::BI__builtin_neon_vsha1pq_u32: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, "vsha1h"); case NEON::BI__builtin_neon_vsha1mq_u32: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, "vsha1h"); case NEON::BI__builtin_neon_vcvth_bf16_f32: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vcvtbfp2bf), Ops, "vcvtbfp2bf"); } // The ARM _MoveToCoprocessor builtins put the input register value as // the first argument, but the LLVM intrinsic expects it as the third one. case clang::ARM::BI_MoveToCoprocessor: case clang::ARM::BI_MoveToCoprocessor2: { Function *F = CGM.getIntrinsic(BuiltinID == clang::ARM::BI_MoveToCoprocessor ? Intrinsic::arm_mcr : Intrinsic::arm_mcr2); return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], Ops[3], Ops[4], Ops[5]}); } } // Get the last argument, which specifies the vector type. assert(HasExtraArg); const Expr *Arg = E->getArg(E->getNumArgs()-1); std::optional Result = Arg->getIntegerConstantExpr(getContext()); if (!Result) return nullptr; if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f || BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_d) { // Determine the overloaded type of this builtin. llvm::Type *Ty; if (BuiltinID == clang::ARM::BI__builtin_arm_vcvtr_f) Ty = FloatTy; else Ty = DoubleTy; // Determine whether this is an unsigned conversion or not. bool usgn = Result->getZExtValue() == 1; unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; // Call the appropriate intrinsic. Function *F = CGM.getIntrinsic(Int, Ty); return Builder.CreateCall(F, Ops, "vcvtr"); } // Determine the type of this overloaded NEON intrinsic. NeonTypeFlags Type = Result->getZExtValue(); bool usgn = Type.isUnsigned(); bool rightShift = false; llvm::FixedVectorType *VTy = GetNeonType(this, Type, getTarget().hasLegalHalfType(), false, getTarget().hasBFloat16Type()); llvm::Type *Ty = VTy; if (!Ty) return nullptr; // Many NEON builtins have identical semantics and uses in ARM and // AArch64. Emit these in a single function. auto IntrinsicMap = ArrayRef(ARMSIMDIntrinsicMap); const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); if (Builtin) return EmitCommonNeonBuiltinExpr( Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch); unsigned Int; switch (BuiltinID) { default: return nullptr; case NEON::BI__builtin_neon_vld1q_lane_v: // Handle 64-bit integer elements as a special case. Use shuffles of // one-element vectors to avoid poor code for i64 in the backend. if (VTy->getElementType()->isIntegerTy(64)) { // Extract the other lane. Ops[1] = Builder.CreateBitCast(Ops[1], Ty); int Lane = cast(Ops[2])->getZExtValue(); Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); // Load the value as a one-element vector. Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1); llvm::Type *Tys[] = {Ty, Int8PtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); Value *Align = getAlignmentValue32(PtrOp0); Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); // Combine them. int Indices[] = {1 - Lane, Lane}; return Builder.CreateShuffleVector(Ops[1], Ld, Indices, "vld1q_lane"); } [[fallthrough]]; case NEON::BI__builtin_neon_vld1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); PtrOp0 = PtrOp0.withElementType(VTy->getElementType()); Value *Ld = Builder.CreateLoad(PtrOp0); return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); } case NEON::BI__builtin_neon_vqrshrn_n_v: Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 1, true); case NEON::BI__builtin_neon_vqrshrun_n_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), Ops, "vqrshrun_n", 1, true); case NEON::BI__builtin_neon_vqshrn_n_v: Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 1, true); case NEON::BI__builtin_neon_vqshrun_n_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), Ops, "vqshrun_n", 1, true); case NEON::BI__builtin_neon_vrecpe_v: case NEON::BI__builtin_neon_vrecpeq_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), Ops, "vrecpe"); case NEON::BI__builtin_neon_vrshrn_n_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), Ops, "vrshrn_n", 1, true); case NEON::BI__builtin_neon_vrsra_n_v: case NEON::BI__builtin_neon_vrsraq_n_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); case NEON::BI__builtin_neon_vsri_n_v: case NEON::BI__builtin_neon_vsriq_n_v: rightShift = true; [[fallthrough]]; case NEON::BI__builtin_neon_vsli_n_v: case NEON::BI__builtin_neon_vsliq_n_v: Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), Ops, "vsli_n"); case NEON::BI__builtin_neon_vsra_n_v: case NEON::BI__builtin_neon_vsraq_n_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); return Builder.CreateAdd(Ops[0], Ops[1]); case NEON::BI__builtin_neon_vst1q_lane_v: // Handle 64-bit integer elements as a special case. Use a shuffle to get // a one-element vector and avoid poor code for i64 in the backend. if (VTy->getElementType()->isIntegerTy(64)) { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Value *SV = llvm::ConstantVector::get(cast(Ops[2])); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); Ops[2] = getAlignmentValue32(PtrOp0); llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Tys), Ops); } [[fallthrough]]; case NEON::BI__builtin_neon_vst1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); return Builder.CreateStore(Ops[1], PtrOp0.withElementType(Ops[1]->getType())); } case NEON::BI__builtin_neon_vtbl1_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), Ops, "vtbl1"); case NEON::BI__builtin_neon_vtbl2_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), Ops, "vtbl2"); case NEON::BI__builtin_neon_vtbl3_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), Ops, "vtbl3"); case NEON::BI__builtin_neon_vtbl4_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), Ops, "vtbl4"); case NEON::BI__builtin_neon_vtbx1_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), Ops, "vtbx1"); case NEON::BI__builtin_neon_vtbx2_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), Ops, "vtbx2"); case NEON::BI__builtin_neon_vtbx3_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), Ops, "vtbx3"); case NEON::BI__builtin_neon_vtbx4_v: return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), Ops, "vtbx4"); } } template static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { return E->getIntegerConstantExpr(Context)->getExtValue(); } static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned) { // Helper function called by Tablegen-constructed ARM MVE builtin codegen, // which finds it convenient to specify signed/unsigned as a boolean flag. return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); } static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, uint32_t Shift, bool Unsigned) { // MVE helper function for integer shift right. This must handle signed vs // unsigned, and also deal specially with the case where the shift count is // equal to the lane size. In LLVM IR, an LShr with that parameter would be // undefined behavior, but in MVE it's legal, so we must convert it to code // that is not undefined in IR. unsigned LaneBits = cast(V->getType()) ->getElementType() ->getPrimitiveSizeInBits(); if (Shift == LaneBits) { // An unsigned shift of the full lane size always generates zero, so we can // simply emit a zero vector. A signed shift of the full lane size does the // same thing as shifting by one bit fewer. if (Unsigned) return llvm::Constant::getNullValue(V->getType()); else --Shift; } return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); } static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { // MVE-specific helper function for a vector splat, which infers the element // count of the output vector by knowing that MVE vectors are all 128 bits // wide. unsigned Elements = 128 / V->getType()->getPrimitiveSizeInBits(); return Builder.CreateVectorSplat(Elements, V); } static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, CodeGenFunction *CGF, llvm::Value *V, llvm::Type *DestType) { // Convert one MVE vector type into another by reinterpreting its in-register // format. // // Little-endian, this is identical to a bitcast (which reinterprets the // memory format). But big-endian, they're not necessarily the same, because // the register and memory formats map to each other differently depending on // the lane size. // // We generate a bitcast whenever we can (if we're little-endian, or if the // lane sizes are the same anyway). Otherwise we fall back to an IR intrinsic // that performs the different kind of reinterpretation. if (CGF->getTarget().isBigEndian() && V->getType()->getScalarSizeInBits() != DestType->getScalarSizeInBits()) { return Builder.CreateCall( CGF->CGM.getIntrinsic(Intrinsic::arm_mve_vreinterpretq, {DestType, V->getType()}), V); } else { return Builder.CreateBitCast(V, DestType); } } static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { // Make a shufflevector that extracts every other element of a vector (evens // or odds, as desired). SmallVector Indices; unsigned InputElements = cast(V->getType())->getNumElements(); for (unsigned i = 0; i < InputElements; i += 2) Indices.push_back(i + Odd); return Builder.CreateShuffleVector(V, Indices); } static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, llvm::Value *V1) { // Make a shufflevector that interleaves two vectors element by element. assert(V0->getType() == V1->getType() && "Can't zip different vector types"); SmallVector Indices; unsigned InputElements = cast(V0->getType())->getNumElements(); for (unsigned i = 0; i < InputElements; i++) { Indices.push_back(i); Indices.push_back(i + InputElements); } return Builder.CreateShuffleVector(V0, V1, Indices); } template static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { // MVE-specific helper function to make a vector splat of a constant such as // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. llvm::Type *T = cast(VT)->getElementType(); unsigned LaneBits = T->getPrimitiveSizeInBits(); uint32_t Value = HighBit << (LaneBits - 1); if (OtherBits) Value |= (1UL << (LaneBits - 1)) - 1; llvm::Value *Lane = llvm::ConstantInt::get(T, Value); return ARMMVEVectorSplat(Builder, Lane); } static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, llvm::Value *V, unsigned ReverseWidth) { // MVE-specific helper function which reverses the elements of a // vector within every (ReverseWidth)-bit collection of lanes. SmallVector Indices; unsigned LaneSize = V->getType()->getScalarSizeInBits(); unsigned Elements = 128 / LaneSize; unsigned Mask = ReverseWidth / LaneSize - 1; for (unsigned i = 0; i < Elements; i++) Indices.push_back(i ^ Mask); return Builder.CreateShuffleVector(V, Indices); } Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType; Intrinsic::ID IRIntr; unsigned NumVectors; // Code autogenerated by Tablegen will handle all the simple builtins. switch (BuiltinID) { #include "clang/Basic/arm_mve_builtin_cg.inc" // If we didn't match an MVE builtin id at all, go back to the // main EmitARMBuiltinExpr. default: return nullptr; } // Anything that breaks from that switch is an MVE builtin that // needs handwritten code to generate. switch (CustomCodeGenType) { case CustomCodeGen::VLD24: { llvm::SmallVector Ops; llvm::SmallVector Tys; auto MvecCType = E->getType(); auto MvecLType = ConvertType(MvecCType); assert(MvecLType->isStructTy() && "Return type for vld[24]q should be a struct"); assert(MvecLType->getStructNumElements() == 1 && "Return-type struct for vld[24]q should have one element"); auto MvecLTypeInner = MvecLType->getStructElementType(0); assert(MvecLTypeInner->isArrayTy() && "Return-type struct for vld[24]q should contain an array"); assert(MvecLTypeInner->getArrayNumElements() == NumVectors && "Array member of return-type struct vld[24]q has wrong length"); auto VecLType = MvecLTypeInner->getArrayElementType(); Tys.push_back(VecLType); auto Addr = E->getArg(0); Ops.push_back(EmitScalarExpr(Addr)); Tys.push_back(ConvertType(Addr->getType())); Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); Value *LoadResult = Builder.CreateCall(F, Ops); Value *MvecOut = PoisonValue::get(MvecLType); for (unsigned i = 0; i < NumVectors; ++i) { Value *Vec = Builder.CreateExtractValue(LoadResult, i); MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i}); } if (ReturnValue.isNull()) return MvecOut; else return Builder.CreateStore(MvecOut, ReturnValue.getAddress()); } case CustomCodeGen::VST24: { llvm::SmallVector Ops; llvm::SmallVector Tys; auto Addr = E->getArg(0); Ops.push_back(EmitScalarExpr(Addr)); Tys.push_back(ConvertType(Addr->getType())); auto MvecCType = E->getArg(1)->getType(); auto MvecLType = ConvertType(MvecCType); assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct"); assert(MvecLType->getStructNumElements() == 1 && "Data-type struct for vst2q should have one element"); auto MvecLTypeInner = MvecLType->getStructElementType(0); assert(MvecLTypeInner->isArrayTy() && "Data-type struct for vst2q should contain an array"); assert(MvecLTypeInner->getArrayNumElements() == NumVectors && "Array member of return-type struct vld[24]q has wrong length"); auto VecLType = MvecLTypeInner->getArrayElementType(); Tys.push_back(VecLType); AggValueSlot MvecSlot = CreateAggTemp(MvecCType); EmitAggExpr(E->getArg(1), MvecSlot); auto Mvec = Builder.CreateLoad(MvecSlot.getAddress()); for (unsigned i = 0; i < NumVectors; i++) Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i})); Function *F = CGM.getIntrinsic(IRIntr, ArrayRef(Tys)); Value *ToReturn = nullptr; for (unsigned i = 0; i < NumVectors; i++) { Ops.push_back(llvm::ConstantInt::get(Int32Ty, i)); ToReturn = Builder.CreateCall(F, Ops); Ops.pop_back(); } return ToReturn; } } llvm_unreachable("unknown custom codegen type."); } Value *CodeGenFunction::EmitARMCDEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Triple::ArchType Arch) { switch (BuiltinID) { default: return nullptr; #include "clang/Basic/arm_cde_builtin_cg.inc" } } static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, llvm::Triple::ArchType Arch) { unsigned int Int = 0; const char *s = nullptr; switch (BuiltinID) { default: return nullptr; case NEON::BI__builtin_neon_vtbl1_v: case NEON::BI__builtin_neon_vqtbl1_v: case NEON::BI__builtin_neon_vqtbl1q_v: case NEON::BI__builtin_neon_vtbl2_v: case NEON::BI__builtin_neon_vqtbl2_v: case NEON::BI__builtin_neon_vqtbl2q_v: case NEON::BI__builtin_neon_vtbl3_v: case NEON::BI__builtin_neon_vqtbl3_v: case NEON::BI__builtin_neon_vqtbl3q_v: case NEON::BI__builtin_neon_vtbl4_v: case NEON::BI__builtin_neon_vqtbl4_v: case NEON::BI__builtin_neon_vqtbl4q_v: break; case NEON::BI__builtin_neon_vtbx1_v: case NEON::BI__builtin_neon_vqtbx1_v: case NEON::BI__builtin_neon_vqtbx1q_v: case NEON::BI__builtin_neon_vtbx2_v: case NEON::BI__builtin_neon_vqtbx2_v: case NEON::BI__builtin_neon_vqtbx2q_v: case NEON::BI__builtin_neon_vtbx3_v: case NEON::BI__builtin_neon_vqtbx3_v: case NEON::BI__builtin_neon_vqtbx3q_v: case NEON::BI__builtin_neon_vtbx4_v: case NEON::BI__builtin_neon_vqtbx4_v: case NEON::BI__builtin_neon_vqtbx4q_v: break; } assert(E->getNumArgs() >= 3); // Get the last argument, which specifies the vector type. const Expr *Arg = E->getArg(E->getNumArgs() - 1); std::optional Result = Arg->getIntegerConstantExpr(CGF.getContext()); if (!Result) return nullptr; // Determine the type of this overloaded NEON intrinsic. NeonTypeFlags Type = Result->getZExtValue(); llvm::FixedVectorType *Ty = GetNeonType(&CGF, Type); if (!Ty) return nullptr; CodeGen::CGBuilderTy &Builder = CGF.Builder; // AArch64 scalar builtins are not overloaded, they do not have an extra // argument that specifies the vector type, need to handle each case. switch (BuiltinID) { case NEON::BI__builtin_neon_vtbl1_v: { return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 1), nullptr, Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); } case NEON::BI__builtin_neon_vtbl2_v: { return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 2), nullptr, Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); } case NEON::BI__builtin_neon_vtbl3_v: { return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 3), nullptr, Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); } case NEON::BI__builtin_neon_vtbl4_v: { return packTBLDVectorList(CGF, ArrayRef(Ops).slice(0, 4), nullptr, Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); } case NEON::BI__builtin_neon_vtbx1_v: { Value *TblRes = packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 1), nullptr, Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); llvm::Constant *EightV = ConstantInt::get(Ty, 8); Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); CmpRes = Builder.CreateSExt(CmpRes, Ty); Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); } case NEON::BI__builtin_neon_vtbx2_v: { return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 2), Ops[0], Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, "vtbx1"); } case NEON::BI__builtin_neon_vtbx3_v: { Value *TblRes = packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 3), nullptr, Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], TwentyFourV); CmpRes = Builder.CreateSExt(CmpRes, Ty); Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); } case NEON::BI__builtin_neon_vtbx4_v: { return packTBLDVectorList(CGF, ArrayRef(Ops).slice(1, 4), Ops[0], Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, "vtbx2"); } case NEON::BI__builtin_neon_vqtbl1_v: case NEON::BI__builtin_neon_vqtbl1q_v: Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; case NEON::BI__builtin_neon_vqtbl2_v: case NEON::BI__builtin_neon_vqtbl2q_v: { Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; case NEON::BI__builtin_neon_vqtbl3_v: case NEON::BI__builtin_neon_vqtbl3q_v: Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; case NEON::BI__builtin_neon_vqtbl4_v: case NEON::BI__builtin_neon_vqtbl4q_v: Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; case NEON::BI__builtin_neon_vqtbx1_v: case NEON::BI__builtin_neon_vqtbx1q_v: Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; case NEON::BI__builtin_neon_vqtbx2_v: case NEON::BI__builtin_neon_vqtbx2q_v: Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; case NEON::BI__builtin_neon_vqtbx3_v: case NEON::BI__builtin_neon_vqtbx3q_v: Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; case NEON::BI__builtin_neon_vqtbx4_v: case NEON::BI__builtin_neon_vqtbx4q_v: Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; } } if (!Int) return nullptr; Function *F = CGF.CGM.getIntrinsic(Int, Ty); return CGF.EmitNeonCall(F, Ops, s); } Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4); Op = Builder.CreateBitCast(Op, Int16Ty); Value *V = PoisonValue::get(VTy); llvm::Constant *CI = ConstantInt::get(SizeTy, 0); Op = Builder.CreateInsertElement(V, Op, CI); return Op; } /// SVEBuiltinMemEltTy - Returns the memory element type for this memory /// access builtin. Only required if it can't be inferred from the base pointer /// operand. llvm::Type *CodeGenFunction::SVEBuiltinMemEltTy(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getMemEltType()) { case SVETypeFlags::MemEltTyDefault: return getEltType(TypeFlags); case SVETypeFlags::MemEltTyInt8: return Builder.getInt8Ty(); case SVETypeFlags::MemEltTyInt16: return Builder.getInt16Ty(); case SVETypeFlags::MemEltTyInt32: return Builder.getInt32Ty(); case SVETypeFlags::MemEltTyInt64: return Builder.getInt64Ty(); } llvm_unreachable("Unknown MemEltType"); } llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Invalid SVETypeFlag!"); case SVETypeFlags::EltTyInt8: return Builder.getInt8Ty(); case SVETypeFlags::EltTyInt16: return Builder.getInt16Ty(); case SVETypeFlags::EltTyInt32: return Builder.getInt32Ty(); case SVETypeFlags::EltTyInt64: return Builder.getInt64Ty(); case SVETypeFlags::EltTyInt128: return Builder.getInt128Ty(); case SVETypeFlags::EltTyFloat16: return Builder.getHalfTy(); case SVETypeFlags::EltTyFloat32: return Builder.getFloatTy(); case SVETypeFlags::EltTyFloat64: return Builder.getDoubleTy(); case SVETypeFlags::EltTyBFloat16: return Builder.getBFloatTy(); case SVETypeFlags::EltTyBool8: case SVETypeFlags::EltTyBool16: case SVETypeFlags::EltTyBool32: case SVETypeFlags::EltTyBool64: return Builder.getInt1Ty(); } } // Return the llvm predicate vector type corresponding to the specified element // TypeFlags. llvm::ScalableVectorType * CodeGenFunction::getSVEPredType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Unhandled SVETypeFlag!"); case SVETypeFlags::EltTyInt8: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); case SVETypeFlags::EltTyInt16: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyInt32: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyInt64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); case SVETypeFlags::EltTyBFloat16: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyFloat16: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyFloat32: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyFloat64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); case SVETypeFlags::EltTyBool8: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); case SVETypeFlags::EltTyBool16: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyBool32: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyBool64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); } } // Return the llvm vector type corresponding to the specified element TypeFlags. llvm::ScalableVectorType * CodeGenFunction::getSVEType(const SVETypeFlags &TypeFlags) { switch (TypeFlags.getEltType()) { default: llvm_unreachable("Invalid SVETypeFlag!"); case SVETypeFlags::EltTyInt8: return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); case SVETypeFlags::EltTyInt16: return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8); case SVETypeFlags::EltTyInt32: return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4); case SVETypeFlags::EltTyInt64: return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); case SVETypeFlags::EltTyFloat16: return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); case SVETypeFlags::EltTyBFloat16: return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8); case SVETypeFlags::EltTyFloat32: return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); case SVETypeFlags::EltTyFloat64: return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2); case SVETypeFlags::EltTyBool8: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); case SVETypeFlags::EltTyBool16: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyBool32: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyBool64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); } } llvm::Value * CodeGenFunction::EmitSVEAllTruePred(const SVETypeFlags &TypeFlags) { Function *Ptrue = CGM.getIntrinsic(Intrinsic::aarch64_sve_ptrue, getSVEPredType(TypeFlags)); return Builder.CreateCall(Ptrue, {Builder.getInt32(/*SV_ALL*/ 31)}); } constexpr unsigned SVEBitsPerBlock = 128; static llvm::ScalableVectorType *getSVEVectorForElementType(llvm::Type *EltTy) { unsigned NumElts = SVEBitsPerBlock / EltTy->getScalarSizeInBits(); return llvm::ScalableVectorType::get(EltTy, NumElts); } // Reinterpret the input predicate so that it can be used to correctly isolate // the elements of the specified datatype. Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, llvm::ScalableVectorType *VTy) { if (isa(Pred->getType()) && cast(Pred->getType())->getName() == "aarch64.svcount") return Pred; auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy); if (Pred->getType() == RTy) return Pred; unsigned IntID; llvm::Type *IntrinsicTy; switch (VTy->getMinNumElements()) { default: llvm_unreachable("unsupported element count!"); case 1: case 2: case 4: case 8: IntID = Intrinsic::aarch64_sve_convert_from_svbool; IntrinsicTy = RTy; break; case 16: IntID = Intrinsic::aarch64_sve_convert_to_svbool; IntrinsicTy = Pred->getType(); break; } Function *F = CGM.getIntrinsic(IntID, IntrinsicTy); Value *C = Builder.CreateCall(F, Pred); assert(C->getType() == RTy && "Unexpected return type!"); return C; } Value *CodeGenFunction::EmitSVEGatherLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *ResultTy = getSVEType(TypeFlags); auto *OverloadedTy = llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), ResultTy); Function *F = nullptr; if (Ops[1]->getType()->isVectorTy()) // This is the "vector base, scalar offset" case. In order to uniquely // map this built-in to an LLVM IR intrinsic, we need both the return type // and the type of the vector base. F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[1]->getType()}); else // This is the "scalar base, vector offset case". The type of the offset // is encoded in the name of the intrinsic. We only need to specify the // return type in order to uniquely map this built-in to an LLVM IR // intrinsic. F = CGM.getIntrinsic(IntID, OverloadedTy); // At the ACLE level there's only one predicate type, svbool_t, which is // mapped to . However, this might be incompatible with the // actual type being loaded. For example, when loading doubles (i64) the // predicate should be instead. At the IR level the type of // the predicate and the data being loaded must match. Cast to the type // expected by the intrinsic. The intrinsic itself should be defined in // a way than enforces relations between parameter types. Ops[0] = EmitSVEPredicateCast( Ops[0], cast(F->getArg(0)->getType())); // Pass 0 when the offset is missing. This can only be applied when using // the "vector base" addressing mode for which ACLE allows no offset. The // corresponding LLVM IR always requires an offset. if (Ops.size() == 2) { assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); Ops.push_back(ConstantInt::get(Int64Ty, 0)); } // For "vector base, scalar index" scale the index so that it becomes a // scalar offset. if (!TypeFlags.isByteIndexed() && Ops[1]->getType()->isVectorTy()) { unsigned BytesPerElt = OverloadedTy->getElementType()->getScalarSizeInBits() / 8; Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); } Value *Call = Builder.CreateCall(F, Ops); // The following sext/zext is only needed when ResultTy != OverloadedTy. In // other cases it's folded into a nop. return TypeFlags.isZExtReturn() ? Builder.CreateZExt(Call, ResultTy) : Builder.CreateSExt(Call, ResultTy); } Value *CodeGenFunction::EmitSVEScatterStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *SrcDataTy = getSVEType(TypeFlags); auto *OverloadedTy = llvm::ScalableVectorType::get(SVEBuiltinMemEltTy(TypeFlags), SrcDataTy); // In ACLE the source data is passed in the last argument, whereas in LLVM IR // it's the first argument. Move it accordingly. Ops.insert(Ops.begin(), Ops.pop_back_val()); Function *F = nullptr; if (Ops[2]->getType()->isVectorTy()) // This is the "vector base, scalar offset" case. In order to uniquely // map this built-in to an LLVM IR intrinsic, we need both the return type // and the type of the vector base. F = CGM.getIntrinsic(IntID, {OverloadedTy, Ops[2]->getType()}); else // This is the "scalar base, vector offset case". The type of the offset // is encoded in the name of the intrinsic. We only need to specify the // return type in order to uniquely map this built-in to an LLVM IR // intrinsic. F = CGM.getIntrinsic(IntID, OverloadedTy); // Pass 0 when the offset is missing. This can only be applied when using // the "vector base" addressing mode for which ACLE allows no offset. The // corresponding LLVM IR always requires an offset. if (Ops.size() == 3) { assert(Ops[1]->getType()->isVectorTy() && "Scalar base requires an offset"); Ops.push_back(ConstantInt::get(Int64Ty, 0)); } // Truncation is needed when SrcDataTy != OverloadedTy. In other cases it's // folded into a nop. Ops[0] = Builder.CreateTrunc(Ops[0], OverloadedTy); // At the ACLE level there's only one predicate type, svbool_t, which is // mapped to . However, this might be incompatible with the // actual type being stored. For example, when storing doubles (i64) the // predicated should be instead. At the IR level the type of // the predicate and the data being stored must match. Cast to the type // expected by the intrinsic. The intrinsic itself should be defined in // a way that enforces relations between parameter types. Ops[1] = EmitSVEPredicateCast( Ops[1], cast(F->getArg(1)->getType())); // For "vector base, scalar index" scale the index so that it becomes a // scalar offset. if (!TypeFlags.isByteIndexed() && Ops[2]->getType()->isVectorTy()) { unsigned BytesPerElt = OverloadedTy->getElementType()->getScalarSizeInBits() / 8; Ops[3] = Builder.CreateShl(Ops[3], Log2_32(BytesPerElt)); } return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitSVEGatherPrefetch(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { // The gather prefetches are overloaded on the vector input - this can either // be the vector of base addresses or vector of offsets. auto *OverloadedTy = dyn_cast(Ops[1]->getType()); if (!OverloadedTy) OverloadedTy = cast(Ops[2]->getType()); // Cast the predicate from svbool_t to the right number of elements. Ops[0] = EmitSVEPredicateCast(Ops[0], OverloadedTy); // vector + imm addressing modes if (Ops[1]->getType()->isVectorTy()) { if (Ops.size() == 3) { // Pass 0 for 'vector+imm' when the index is omitted. Ops.push_back(ConstantInt::get(Int64Ty, 0)); // The sv_prfop is the last operand in the builtin and IR intrinsic. std::swap(Ops[2], Ops[3]); } else { // Index needs to be passed as scaled offset. llvm::Type *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); unsigned BytesPerElt = MemEltTy->getPrimitiveSizeInBits() / 8; if (BytesPerElt > 1) Ops[2] = Builder.CreateShl(Ops[2], Log2_32(BytesPerElt)); } } Function *F = CGM.getIntrinsic(IntID, OverloadedTy); return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitSVEStructLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); unsigned N; switch (IntID) { case Intrinsic::aarch64_sve_ld2_sret: case Intrinsic::aarch64_sve_ld1_pn_x2: case Intrinsic::aarch64_sve_ldnt1_pn_x2: case Intrinsic::aarch64_sve_ld2q_sret: N = 2; break; case Intrinsic::aarch64_sve_ld3_sret: case Intrinsic::aarch64_sve_ld3q_sret: N = 3; break; case Intrinsic::aarch64_sve_ld4_sret: case Intrinsic::aarch64_sve_ld1_pn_x4: case Intrinsic::aarch64_sve_ldnt1_pn_x4: case Intrinsic::aarch64_sve_ld4q_sret: N = 4; break; default: llvm_unreachable("unknown intrinsic!"); } auto RetTy = llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N); Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); Value *BasePtr = Ops[1]; // Does the load have an offset? if (Ops.size() > 2) BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); Function *F = CGM.getIntrinsic(IntID, {VTy}); Value *Call = Builder.CreateCall(F, {Predicate, BasePtr}); unsigned MinElts = VTy->getMinNumElements(); Value *Ret = llvm::PoisonValue::get(RetTy); for (unsigned I = 0; I < N; I++) { Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); Value *SRet = Builder.CreateExtractValue(Call, I); Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); } return Ret; } Value *CodeGenFunction::EmitSVEStructStore(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { llvm::ScalableVectorType *VTy = getSVEType(TypeFlags); unsigned N; switch (IntID) { case Intrinsic::aarch64_sve_st2: case Intrinsic::aarch64_sve_st1_pn_x2: case Intrinsic::aarch64_sve_stnt1_pn_x2: case Intrinsic::aarch64_sve_st2q: N = 2; break; case Intrinsic::aarch64_sve_st3: case Intrinsic::aarch64_sve_st3q: N = 3; break; case Intrinsic::aarch64_sve_st4: case Intrinsic::aarch64_sve_st1_pn_x4: case Intrinsic::aarch64_sve_stnt1_pn_x4: case Intrinsic::aarch64_sve_st4q: N = 4; break; default: llvm_unreachable("unknown intrinsic!"); } Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); Value *BasePtr = Ops[1]; // Does the store have an offset? if (Ops.size() > (2 + N)) BasePtr = Builder.CreateGEP(VTy, BasePtr, Ops[2]); // The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we // need to break up the tuple vector. SmallVector Operands; for (unsigned I = Ops.size() - N; I < Ops.size(); ++I) Operands.push_back(Ops[I]); Operands.append({Predicate, BasePtr}); Function *F = CGM.getIntrinsic(IntID, { VTy }); return Builder.CreateCall(F, Operands); } // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and // svpmullt_pair intrinsics, with the exception that their results are bitcast // to a wider type. Value *CodeGenFunction::EmitSVEPMull(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID) { // Splat scalar operand to vector (intrinsics with _n infix) if (TypeFlags.hasSplatOperand()) { unsigned OpNo = TypeFlags.getSplatOperand(); Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); } // The pair-wise function has a narrower overloaded type. Function *F = CGM.getIntrinsic(BuiltinID, Ops[0]->getType()); Value *Call = Builder.CreateCall(F, {Ops[0], Ops[1]}); // Now bitcast to the wider result type. llvm::ScalableVectorType *Ty = getSVEType(TypeFlags); return EmitSVEReinterpret(Call, Ty); } Value *CodeGenFunction::EmitSVEMovl(const SVETypeFlags &TypeFlags, ArrayRef Ops, unsigned BuiltinID) { llvm::Type *OverloadedTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(BuiltinID, OverloadedTy); return Builder.CreateCall(F, {Ops[0], Builder.getInt32(0)}); } Value *CodeGenFunction::EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID) { auto *MemEltTy = SVEBuiltinMemEltTy(TypeFlags); auto *VectorTy = getSVEVectorForElementType(MemEltTy); auto *MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy); Value *BasePtr = Ops[1]; // Implement the index operand if not omitted. if (Ops.size() > 3) BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); Value *PrfOp = Ops.back(); Function *F = CGM.getIntrinsic(BuiltinID, Predicate->getType()); return Builder.CreateCall(F, {Predicate, BasePtr, PrfOp}); } Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E, llvm::Type *ReturnTy, SmallVectorImpl &Ops, unsigned IntrinsicID, bool IsZExtReturn) { QualType LangPTy = E->getArg(1)->getType(); llvm::Type *MemEltTy = CGM.getTypes().ConvertType( LangPTy->castAs()->getPointeeType()); // The vector type that is returned may be different from the // eventual type loaded from memory. auto VectorTy = cast(ReturnTy); llvm::ScalableVectorType *MemoryTy = nullptr; llvm::ScalableVectorType *PredTy = nullptr; bool IsQuadLoad = false; switch (IntrinsicID) { case Intrinsic::aarch64_sve_ld1uwq: case Intrinsic::aarch64_sve_ld1udq: MemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); PredTy = llvm::ScalableVectorType::get( llvm::Type::getInt1Ty(getLLVMContext()), 1); IsQuadLoad = true; break; default: MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); PredTy = MemoryTy; break; } Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); Value *BasePtr = Ops[1]; // Does the load have an offset? if (Ops.size() > 2) BasePtr = Builder.CreateGEP(MemoryTy, BasePtr, Ops[2]); Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadLoad ? VectorTy : MemoryTy); auto *Load = cast(Builder.CreateCall(F, {Predicate, BasePtr})); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); CGM.DecorateInstructionWithTBAA(Load, TBAAInfo); if (IsQuadLoad) return Load; return IsZExtReturn ? Builder.CreateZExt(Load, VectorTy) : Builder.CreateSExt(Load, VectorTy); } Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E, SmallVectorImpl &Ops, unsigned IntrinsicID) { QualType LangPTy = E->getArg(1)->getType(); llvm::Type *MemEltTy = CGM.getTypes().ConvertType( LangPTy->castAs()->getPointeeType()); // The vector type that is stored may be different from the // eventual type stored to memory. auto VectorTy = cast(Ops.back()->getType()); auto MemoryTy = llvm::ScalableVectorType::get(MemEltTy, VectorTy); auto PredTy = MemoryTy; auto AddrMemoryTy = MemoryTy; bool IsQuadStore = false; switch (IntrinsicID) { case Intrinsic::aarch64_sve_st1wq: case Intrinsic::aarch64_sve_st1dq: AddrMemoryTy = llvm::ScalableVectorType::get(MemEltTy, 1); PredTy = llvm::ScalableVectorType::get(IntegerType::get(getLLVMContext(), 1), 1); IsQuadStore = true; break; default: break; } Value *Predicate = EmitSVEPredicateCast(Ops[0], PredTy); Value *BasePtr = Ops[1]; // Does the store have an offset? if (Ops.size() == 4) BasePtr = Builder.CreateGEP(AddrMemoryTy, BasePtr, Ops[2]); // Last value is always the data Value *Val = IsQuadStore ? Ops.back() : Builder.CreateTrunc(Ops.back(), MemoryTy); Function *F = CGM.getIntrinsic(IntrinsicID, IsQuadStore ? VectorTy : MemoryTy); auto *Store = cast(Builder.CreateCall(F, {Val, Predicate, BasePtr})); auto TBAAInfo = CGM.getTBAAAccessInfo(LangPTy->getPointeeType()); CGM.DecorateInstructionWithTBAA(Store, TBAAInfo); return Store; } Value *CodeGenFunction::EmitSMELd1St1(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { Ops[2] = EmitSVEPredicateCast( Ops[2], getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags))); SmallVector NewOps; NewOps.push_back(Ops[2]); llvm::Value *BasePtr = Ops[3]; // If the intrinsic contains the vnum parameter, multiply it with the vector // size in bytes. if (Ops.size() == 5) { Function *StreamingVectorLength = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); llvm::Value *StreamingVectorLengthCall = Builder.CreateCall(StreamingVectorLength); llvm::Value *Mulvl = Builder.CreateMul(StreamingVectorLengthCall, Ops[4], "mulvl"); // The type of the ptr parameter is void *, so use Int8Ty here. BasePtr = Builder.CreateGEP(Int8Ty, Ops[3], Mulvl); } NewOps.push_back(BasePtr); NewOps.push_back(Ops[0]); NewOps.push_back(Ops[1]); Function *F = CGM.getIntrinsic(IntID); return Builder.CreateCall(F, NewOps); } Value *CodeGenFunction::EmitSMEReadWrite(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { auto *VecTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(IntID, VecTy); if (TypeFlags.isReadZA()) Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); else if (TypeFlags.isWriteZA()) Ops[2] = EmitSVEPredicateCast(Ops[2], VecTy); return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitSMEZero(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { // svzero_za() intrinsic zeros the entire za tile and has no paramters. if (Ops.size() == 0) Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255)); Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } Value *CodeGenFunction::EmitSMELdrStr(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { if (Ops.size() == 2) Ops.push_back(Builder.getInt32(0)); else Ops[2] = Builder.CreateIntCast(Ops[2], Int32Ty, true); Function *F = CGM.getIntrinsic(IntID, {}); return Builder.CreateCall(F, Ops); } // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { return Builder.CreateVectorSplat( cast(Ty)->getElementCount(), Scalar); } Value *CodeGenFunction::EmitSVEDupX(Value* Scalar) { return EmitSVEDupX(Scalar, getSVEVectorForElementType(Scalar->getType())); } Value *CodeGenFunction::EmitSVEReinterpret(Value *Val, llvm::Type *Ty) { // FIXME: For big endian this needs an additional REV, or needs a separate // intrinsic that is code-generated as a no-op, because the LLVM bitcast // instruction is defined as 'bitwise' equivalent from memory point of // view (when storing/reloading), whereas the svreinterpret builtin // implements bitwise equivalent cast from register point of view. // LLVM CodeGen for a bitcast must add an explicit REV for big-endian. return Builder.CreateBitCast(Val, Ty); } static void InsertExplicitZeroOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl &Ops) { auto *SplatZero = Constant::getNullValue(Ty); Ops.insert(Ops.begin(), SplatZero); } static void InsertExplicitUndefOperand(CGBuilderTy &Builder, llvm::Type *Ty, SmallVectorImpl &Ops) { auto *SplatUndef = UndefValue::get(Ty); Ops.insert(Ops.begin(), SplatUndef); } SmallVector CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *ResultType, ArrayRef Ops) { if (TypeFlags.isOverloadNone()) return {}; llvm::Type *DefaultType = getSVEType(TypeFlags); if (TypeFlags.isOverloadWhileOrMultiVecCvt()) return {DefaultType, Ops[1]->getType()}; if (TypeFlags.isOverloadWhileRW()) return {getSVEPredType(TypeFlags), Ops[0]->getType()}; if (TypeFlags.isOverloadCvt()) return {Ops[0]->getType(), Ops.back()->getType()}; if (TypeFlags.isReductionQV() && !ResultType->isScalableTy() && ResultType->isVectorTy()) return {ResultType, Ops[1]->getType()}; assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads"); return {DefaultType}; } Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *Ty, ArrayRef Ops) { assert((TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) && "Expects TypleFlags.isTupleSet() or TypeFlags.isTupleGet()"); unsigned I = cast(Ops[1])->getSExtValue(); auto *SingleVecTy = dyn_cast( TypeFlags.isTupleSet() ? Ops[2]->getType() : Ty); if (!SingleVecTy) return nullptr; Value *Idx = ConstantInt::get(CGM.Int64Ty, I * SingleVecTy->getMinNumElements()); if (TypeFlags.isTupleSet()) return Builder.CreateInsertVector(Ty, Ops[0], Ops[2], Idx); return Builder.CreateExtractVector(Ty, Ops[0], Idx); } Value *CodeGenFunction::EmitSVETupleCreate(const SVETypeFlags &TypeFlags, llvm::Type *Ty, ArrayRef Ops) { assert(TypeFlags.isTupleCreate() && "Expects TypleFlag isTupleCreate"); auto *SrcTy = dyn_cast(Ops[0]->getType()); if (!SrcTy) return nullptr; unsigned MinElts = SrcTy->getMinNumElements(); Value *Call = llvm::PoisonValue::get(Ty); for (unsigned I = 0; I < Ops.size(); I++) { Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); Call = Builder.CreateInsertVector(Ty, Call, Ops[I], Idx); } return Call; } Value *CodeGenFunction::FormSVEBuiltinResult(Value *Call) { // Multi-vector results should be broken up into a single (wide) result // vector. auto *StructTy = dyn_cast(Call->getType()); if (!StructTy) return Call; auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U)); if (!VTy) return Call; unsigned N = StructTy->getNumElements(); // We may need to emit a cast to a svbool_t bool IsPredTy = VTy->getElementType()->isIntegerTy(1); unsigned MinElts = IsPredTy ? 16 : VTy->getMinNumElements(); ScalableVectorType *WideVTy = ScalableVectorType::get(VTy->getElementType(), MinElts * N); Value *Ret = llvm::PoisonValue::get(WideVTy); for (unsigned I = 0; I < N; ++I) { Value *SRet = Builder.CreateExtractValue(Call, I); assert(SRet->getType() == VTy && "Unexpected type for result value"); Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); if (IsPredTy) SRet = EmitSVEPredicateCast( SRet, ScalableVectorType::get(Builder.getInt1Ty(), 16)); Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); } Call = Ret; return Call; } void CodeGenFunction::GetAArch64SVEProcessedOperands( unsigned BuiltinID, const CallExpr *E, SmallVectorImpl &Ops, SVETypeFlags TypeFlags) { // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); // Tuple set/get only requires one insert/extract vector, which is // created by EmitSVETupleSetOrGet. bool IsTupleGetOrSet = TypeFlags.isTupleSet() || TypeFlags.isTupleGet(); for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { bool IsICE = ICEArguments & (1 << i); Value *Arg = EmitScalarExpr(E->getArg(i)); if (IsICE) { // If this is required to be a constant, constant fold it so that we know // that the generated intrinsic gets a ConstantInt. std::optional Result = E->getArg(i)->getIntegerConstantExpr(getContext()); assert(Result && "Expected argument to be a constant"); // Immediates for SVE llvm intrinsics are always 32bit. We can safely // truncate because the immediate has been range checked and no valid // immediate requires more than a handful of bits. *Result = Result->extOrTrunc(32); Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); continue; } if (IsTupleGetOrSet || !isa(Arg->getType())) { Ops.push_back(Arg); continue; } auto *VTy = cast(Arg->getType()); unsigned MinElts = VTy->getMinNumElements(); bool IsPred = VTy->getElementType()->isIntegerTy(1); unsigned N = (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); if (N == 1) { Ops.push_back(Arg); continue; } for (unsigned I = 0; I < N; ++I) { Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); auto *NewVTy = ScalableVectorType::get(VTy->getElementType(), MinElts / N); Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); } } } Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::Type *Ty = ConvertType(E->getType()); if (BuiltinID >= SVE::BI__builtin_sve_reinterpret_s8_s8 && BuiltinID <= SVE::BI__builtin_sve_reinterpret_f64_f64_x4) { Value *Val = EmitScalarExpr(E->getArg(0)); return EmitSVEReinterpret(Val, Ty); } auto *Builtin = findARMVectorIntrinsicInMap(AArch64SVEIntrinsicMap, BuiltinID, AArch64SVEIntrinsicsProvenSorted); llvm::SmallVector Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); if (TypeFlags.isLoad()) return EmitSVEMaskedLoad(E, Ty, Ops, Builtin->LLVMIntrinsic, TypeFlags.isZExtReturn()); else if (TypeFlags.isStore()) return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherLoad()) return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isScatterStore()) return EmitSVEScatterStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isPrefetch()) return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherPrefetch()) return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isStructLoad()) return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isStructStore()) return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isTupleSet() || TypeFlags.isTupleGet()) return EmitSVETupleSetOrGet(TypeFlags, Ty, Ops); else if (TypeFlags.isTupleCreate()) return EmitSVETupleCreate(TypeFlags, Ty, Ops); else if (TypeFlags.isUndef()) return UndefValue::get(Ty); else if (Builtin->LLVMIntrinsic != 0) { if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp) InsertExplicitZeroOperand(Builder, Ty, Ops); if (TypeFlags.getMergeType() == SVETypeFlags::MergeAnyExp) InsertExplicitUndefOperand(Builder, Ty, Ops); // Some ACLE builtins leave out the argument to specify the predicate // pattern, which is expected to be expanded to an SV_ALL pattern. if (TypeFlags.isAppendSVALL()) Ops.push_back(Builder.getInt32(/*SV_ALL*/ 31)); if (TypeFlags.isInsertOp1SVALL()) Ops.insert(&Ops[1], Builder.getInt32(/*SV_ALL*/ 31)); // Predicates must match the main datatype. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (auto PredTy = dyn_cast(Ops[i]->getType())) if (PredTy->getElementType()->isIntegerTy(1)) Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); // Splat scalar operand to vector (intrinsics with _n infix) if (TypeFlags.hasSplatOperand()) { unsigned OpNo = TypeFlags.getSplatOperand(); Ops[OpNo] = EmitSVEDupX(Ops[OpNo]); } if (TypeFlags.isReverseCompare()) std::swap(Ops[1], Ops[2]); else if (TypeFlags.isReverseUSDOT()) std::swap(Ops[1], Ops[2]); else if (TypeFlags.isReverseMergeAnyBinOp() && TypeFlags.getMergeType() == SVETypeFlags::MergeAny) std::swap(Ops[1], Ops[2]); else if (TypeFlags.isReverseMergeAnyAccOp() && TypeFlags.getMergeType() == SVETypeFlags::MergeAny) std::swap(Ops[1], Ops[3]); // Predicated intrinsics with _z suffix need a select w/ zeroinitializer. if (TypeFlags.getMergeType() == SVETypeFlags::MergeZero) { llvm::Type *OpndTy = Ops[1]->getType(); auto *SplatZero = Constant::getNullValue(OpndTy); Ops[1] = Builder.CreateSelect(Ops[0], Ops[1], SplatZero); } Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, getSVEOverloadTypes(TypeFlags, Ty, Ops)); Value *Call = Builder.CreateCall(F, Ops); // Predicate results must be converted to svbool_t. if (auto PredTy = dyn_cast(Call->getType())) if (PredTy->getScalarType()->isIntegerTy(1)) Call = EmitSVEPredicateCast(Call, cast(Ty)); return FormSVEBuiltinResult(Call); } switch (BuiltinID) { default: return nullptr; case SVE::BI__builtin_sve_svreinterpret_b: { auto SVCountTy = llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); Function *CastFromSVCountF = CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); return Builder.CreateCall(CastFromSVCountF, Ops[0]); } case SVE::BI__builtin_sve_svreinterpret_c: { auto SVCountTy = llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); Function *CastToSVCountF = CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); return Builder.CreateCall(CastToSVCountF, Ops[0]); } case SVE::BI__builtin_sve_svpsel_lane_b8: case SVE::BI__builtin_sve_svpsel_lane_b16: case SVE::BI__builtin_sve_svpsel_lane_b32: case SVE::BI__builtin_sve_svpsel_lane_b64: case SVE::BI__builtin_sve_svpsel_lane_c8: case SVE::BI__builtin_sve_svpsel_lane_c16: case SVE::BI__builtin_sve_svpsel_lane_c32: case SVE::BI__builtin_sve_svpsel_lane_c64: { bool IsSVCount = isa(Ops[0]->getType()); assert(((!IsSVCount || cast(Ops[0]->getType())->getName() == "aarch64.svcount")) && "Unexpected TargetExtType"); auto SVCountTy = llvm::TargetExtType::get(getLLVMContext(), "aarch64.svcount"); Function *CastFromSVCountF = CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, SVCountTy); Function *CastToSVCountF = CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, SVCountTy); auto OverloadedTy = getSVEType(SVETypeFlags(Builtin->TypeModifier)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_psel, OverloadedTy); llvm::Value *Ops0 = IsSVCount ? Builder.CreateCall(CastFromSVCountF, Ops[0]) : Ops[0]; llvm::Value *Ops1 = EmitSVEPredicateCast(Ops[1], OverloadedTy); llvm::Value *PSel = Builder.CreateCall(F, {Ops0, Ops1, Ops[2]}); return IsSVCount ? Builder.CreateCall(CastToSVCountF, PSel) : PSel; } case SVE::BI__builtin_sve_svmov_b_z: { // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) SVETypeFlags TypeFlags(Builtin->TypeModifier); llvm::Type* OverloadedTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_and_z, OverloadedTy); return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[1]}); } case SVE::BI__builtin_sve_svnot_b_z: { // svnot_b_z(pg, op) <=> sveor_b_z(pg, op, pg) SVETypeFlags TypeFlags(Builtin->TypeModifier); llvm::Type* OverloadedTy = getSVEType(TypeFlags); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_eor_z, OverloadedTy); return Builder.CreateCall(F, {Ops[0], Ops[1], Ops[0]}); } case SVE::BI__builtin_sve_svmovlb_u16: case SVE::BI__builtin_sve_svmovlb_u32: case SVE::BI__builtin_sve_svmovlb_u64: return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllb); case SVE::BI__builtin_sve_svmovlb_s16: case SVE::BI__builtin_sve_svmovlb_s32: case SVE::BI__builtin_sve_svmovlb_s64: return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllb); case SVE::BI__builtin_sve_svmovlt_u16: case SVE::BI__builtin_sve_svmovlt_u32: case SVE::BI__builtin_sve_svmovlt_u64: return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_ushllt); case SVE::BI__builtin_sve_svmovlt_s16: case SVE::BI__builtin_sve_svmovlt_s32: case SVE::BI__builtin_sve_svmovlt_s64: return EmitSVEMovl(TypeFlags, Ops, Intrinsic::aarch64_sve_sshllt); case SVE::BI__builtin_sve_svpmullt_u16: case SVE::BI__builtin_sve_svpmullt_u64: case SVE::BI__builtin_sve_svpmullt_n_u16: case SVE::BI__builtin_sve_svpmullt_n_u64: return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullt_pair); case SVE::BI__builtin_sve_svpmullb_u16: case SVE::BI__builtin_sve_svpmullb_u64: case SVE::BI__builtin_sve_svpmullb_n_u16: case SVE::BI__builtin_sve_svpmullb_n_u64: return EmitSVEPMull(TypeFlags, Ops, Intrinsic::aarch64_sve_pmullb_pair); case SVE::BI__builtin_sve_svdup_n_b8: case SVE::BI__builtin_sve_svdup_n_b16: case SVE::BI__builtin_sve_svdup_n_b32: case SVE::BI__builtin_sve_svdup_n_b64: { Value *CmpNE = Builder.CreateICmpNE(Ops[0], Constant::getNullValue(Ops[0]->getType())); llvm::ScalableVectorType *OverloadedTy = getSVEType(TypeFlags); Value *Dup = EmitSVEDupX(CmpNE, OverloadedTy); return EmitSVEPredicateCast(Dup, cast(Ty)); } case SVE::BI__builtin_sve_svdupq_n_b8: case SVE::BI__builtin_sve_svdupq_n_b16: case SVE::BI__builtin_sve_svdupq_n_b32: case SVE::BI__builtin_sve_svdupq_n_b64: case SVE::BI__builtin_sve_svdupq_n_u8: case SVE::BI__builtin_sve_svdupq_n_s8: case SVE::BI__builtin_sve_svdupq_n_u64: case SVE::BI__builtin_sve_svdupq_n_f64: case SVE::BI__builtin_sve_svdupq_n_s64: case SVE::BI__builtin_sve_svdupq_n_u16: case SVE::BI__builtin_sve_svdupq_n_f16: case SVE::BI__builtin_sve_svdupq_n_bf16: case SVE::BI__builtin_sve_svdupq_n_s16: case SVE::BI__builtin_sve_svdupq_n_u32: case SVE::BI__builtin_sve_svdupq_n_f32: case SVE::BI__builtin_sve_svdupq_n_s32: { // These builtins are implemented by storing each element to an array and using // ld1rq to materialize a vector. unsigned NumOpnds = Ops.size(); bool IsBoolTy = cast(Ty)->getElementType()->isIntegerTy(1); // For svdupq_n_b* the element type of is an integer of type 128/numelts, // so that the compare can use the width that is natural for the expected // number of predicate lanes. llvm::Type *EltTy = Ops[0]->getType(); if (IsBoolTy) EltTy = IntegerType::get(getLLVMContext(), SVEBitsPerBlock / NumOpnds); SmallVector VecOps; for (unsigned I = 0; I < NumOpnds; ++I) VecOps.push_back(Builder.CreateZExt(Ops[I], EltTy)); Value *Vec = BuildVector(VecOps); llvm::Type *OverloadedTy = getSVEVectorForElementType(EltTy); Value *InsertSubVec = Builder.CreateInsertVector( OverloadedTy, PoisonValue::get(OverloadedTy), Vec, Builder.getInt64(0)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_dupq_lane, OverloadedTy); Value *DupQLane = Builder.CreateCall(F, {InsertSubVec, Builder.getInt64(0)}); if (!IsBoolTy) return DupQLane; SVETypeFlags TypeFlags(Builtin->TypeModifier); Value *Pred = EmitSVEAllTruePred(TypeFlags); // For svdupq_n_b* we need to add an additional 'cmpne' with '0'. F = CGM.getIntrinsic(NumOpnds == 2 ? Intrinsic::aarch64_sve_cmpne : Intrinsic::aarch64_sve_cmpne_wide, OverloadedTy); Value *Call = Builder.CreateCall( F, {Pred, DupQLane, EmitSVEDupX(Builder.getInt64(0))}); return EmitSVEPredicateCast(Call, cast(Ty)); } case SVE::BI__builtin_sve_svpfalse_b: return ConstantInt::getFalse(Ty); case SVE::BI__builtin_sve_svpfalse_c: { auto SVBoolTy = ScalableVectorType::get(Builder.getInt1Ty(), 16); Function *CastToSVCountF = CGM.getIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool, Ty); return Builder.CreateCall(CastToSVCountF, ConstantInt::getFalse(SVBoolTy)); } case SVE::BI__builtin_sve_svlen_bf16: case SVE::BI__builtin_sve_svlen_f16: case SVE::BI__builtin_sve_svlen_f32: case SVE::BI__builtin_sve_svlen_f64: case SVE::BI__builtin_sve_svlen_s8: case SVE::BI__builtin_sve_svlen_s16: case SVE::BI__builtin_sve_svlen_s32: case SVE::BI__builtin_sve_svlen_s64: case SVE::BI__builtin_sve_svlen_u8: case SVE::BI__builtin_sve_svlen_u16: case SVE::BI__builtin_sve_svlen_u32: case SVE::BI__builtin_sve_svlen_u64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast(getSVEType(TF)); auto *NumEls = llvm::ConstantInt::get(Ty, VTy->getElementCount().getKnownMinValue()); Function *F = CGM.getIntrinsic(Intrinsic::vscale, Ty); return Builder.CreateMul(NumEls, Builder.CreateCall(F)); } case SVE::BI__builtin_sve_svtbl2_u8: case SVE::BI__builtin_sve_svtbl2_s8: case SVE::BI__builtin_sve_svtbl2_u16: case SVE::BI__builtin_sve_svtbl2_s16: case SVE::BI__builtin_sve_svtbl2_u32: case SVE::BI__builtin_sve_svtbl2_s32: case SVE::BI__builtin_sve_svtbl2_u64: case SVE::BI__builtin_sve_svtbl2_s64: case SVE::BI__builtin_sve_svtbl2_f16: case SVE::BI__builtin_sve_svtbl2_bf16: case SVE::BI__builtin_sve_svtbl2_f32: case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); auto VTy = cast(getSVEType(TF)); Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sve_tbl2, VTy); return Builder.CreateCall(F, Ops); } case SVE::BI__builtin_sve_svset_neonq_s8: case SVE::BI__builtin_sve_svset_neonq_s16: case SVE::BI__builtin_sve_svset_neonq_s32: case SVE::BI__builtin_sve_svset_neonq_s64: case SVE::BI__builtin_sve_svset_neonq_u8: case SVE::BI__builtin_sve_svset_neonq_u16: case SVE::BI__builtin_sve_svset_neonq_u32: case SVE::BI__builtin_sve_svset_neonq_u64: case SVE::BI__builtin_sve_svset_neonq_f16: case SVE::BI__builtin_sve_svset_neonq_f32: case SVE::BI__builtin_sve_svset_neonq_f64: case SVE::BI__builtin_sve_svset_neonq_bf16: { return Builder.CreateInsertVector(Ty, Ops[0], Ops[1], Builder.getInt64(0)); } case SVE::BI__builtin_sve_svget_neonq_s8: case SVE::BI__builtin_sve_svget_neonq_s16: case SVE::BI__builtin_sve_svget_neonq_s32: case SVE::BI__builtin_sve_svget_neonq_s64: case SVE::BI__builtin_sve_svget_neonq_u8: case SVE::BI__builtin_sve_svget_neonq_u16: case SVE::BI__builtin_sve_svget_neonq_u32: case SVE::BI__builtin_sve_svget_neonq_u64: case SVE::BI__builtin_sve_svget_neonq_f16: case SVE::BI__builtin_sve_svget_neonq_f32: case SVE::BI__builtin_sve_svget_neonq_f64: case SVE::BI__builtin_sve_svget_neonq_bf16: { return Builder.CreateExtractVector(Ty, Ops[0], Builder.getInt64(0)); } case SVE::BI__builtin_sve_svdup_neonq_s8: case SVE::BI__builtin_sve_svdup_neonq_s16: case SVE::BI__builtin_sve_svdup_neonq_s32: case SVE::BI__builtin_sve_svdup_neonq_s64: case SVE::BI__builtin_sve_svdup_neonq_u8: case SVE::BI__builtin_sve_svdup_neonq_u16: case SVE::BI__builtin_sve_svdup_neonq_u32: case SVE::BI__builtin_sve_svdup_neonq_u64: case SVE::BI__builtin_sve_svdup_neonq_f16: case SVE::BI__builtin_sve_svdup_neonq_f32: case SVE::BI__builtin_sve_svdup_neonq_f64: case SVE::BI__builtin_sve_svdup_neonq_bf16: { Value *Insert = Builder.CreateInsertVector(Ty, PoisonValue::get(Ty), Ops[0], Builder.getInt64(0)); return Builder.CreateIntrinsic(Intrinsic::aarch64_sve_dupq_lane, {Ty}, {Insert, Builder.getInt64(0)}); } } /// Should not happen return nullptr; } static void swapCommutativeSMEOperands(unsigned BuiltinID, SmallVectorImpl &Ops) { unsigned MultiVec; switch (BuiltinID) { default: return; case SME::BI__builtin_sme_svsumla_za32_s8_vg4x1: MultiVec = 1; break; case SME::BI__builtin_sme_svsumla_za32_s8_vg4x2: case SME::BI__builtin_sme_svsudot_za32_s8_vg1x2: MultiVec = 2; break; case SME::BI__builtin_sme_svsudot_za32_s8_vg1x4: case SME::BI__builtin_sme_svsumla_za32_s8_vg4x4: MultiVec = 4; break; } if (MultiVec > 0) for (unsigned I = 0; I < MultiVec; ++I) std::swap(Ops[I + 1], Ops[I + 1 + MultiVec]); } Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, AArch64SMEIntrinsicsProvenSorted); llvm::SmallVector Ops; SVETypeFlags TypeFlags(Builtin->TypeModifier); GetAArch64SVEProcessedOperands(BuiltinID, E, Ops, TypeFlags); if (TypeFlags.isLoad() || TypeFlags.isStore()) return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()) return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (BuiltinID == SME::BI__builtin_sme_svzero_mask_za || BuiltinID == SME::BI__builtin_sme_svzero_za) return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (BuiltinID == SME::BI__builtin_sme_svldr_vnum_za || BuiltinID == SME::BI__builtin_sme_svstr_vnum_za || BuiltinID == SME::BI__builtin_sme_svldr_za || BuiltinID == SME::BI__builtin_sme_svstr_za) return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); // Handle builtins which require their multi-vector operands to be swapped swapCommutativeSMEOperands(BuiltinID, Ops); // Should not happen! if (Builtin->LLVMIntrinsic == 0) return nullptr; // Predicates must match the main datatype. for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (auto PredTy = dyn_cast(Ops[i]->getType())) if (PredTy->getElementType()->isIntegerTy(1)) Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags)); Function *F = TypeFlags.isOverloadNone() ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); Value *Call = Builder.CreateCall(F, Ops); return FormSVEBuiltinResult(Call); } Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { if (BuiltinID >= clang::AArch64::FirstSVEBuiltin && BuiltinID <= clang::AArch64::LastSVEBuiltin) return EmitAArch64SVEBuiltinExpr(BuiltinID, E); if (BuiltinID >= clang::AArch64::FirstSMEBuiltin && BuiltinID <= clang::AArch64::LastSMEBuiltin) return EmitAArch64SMEBuiltinExpr(BuiltinID, E); if (BuiltinID == Builtin::BI__builtin_cpu_supports) return EmitAArch64CpuSupports(E); unsigned HintID = static_cast(-1); switch (BuiltinID) { default: break; case clang::AArch64::BI__builtin_arm_nop: HintID = 0; break; case clang::AArch64::BI__builtin_arm_yield: case clang::AArch64::BI__yield: HintID = 1; break; case clang::AArch64::BI__builtin_arm_wfe: case clang::AArch64::BI__wfe: HintID = 2; break; case clang::AArch64::BI__builtin_arm_wfi: case clang::AArch64::BI__wfi: HintID = 3; break; case clang::AArch64::BI__builtin_arm_sev: case clang::AArch64::BI__sev: HintID = 4; break; case clang::AArch64::BI__builtin_arm_sevl: case clang::AArch64::BI__sevl: HintID = 5; break; } if (HintID != static_cast(-1)) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); } if (BuiltinID == clang::AArch64::BI__builtin_arm_trap) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_break); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(F, Builder.CreateZExt(Arg, CGM.Int32Ty)); } if (BuiltinID == clang::AArch64::BI__builtin_arm_get_sme_state) { // Create call to __arm_sme_state and store the results to the two pointers. CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {}, false), "__arm_sme_state")); auto Attrs = AttributeList().addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible"); CI->setAttributes(Attrs); CI->setCallingConv( llvm::CallingConv:: AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2); Builder.CreateStore(Builder.CreateExtractValue(CI, 0), EmitPointerWithAlignment(E->getArg(0))); return Builder.CreateStore(Builder.CreateExtractValue(CI, 1), EmitPointerWithAlignment(E->getArg(1))); } if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit) { assert((getContext().getTypeSize(E->getType()) == 32) && "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_rbit64) { assert((getContext().getTypeSize(E->getType()) == 64) && "rbit of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_clz || BuiltinID == clang::AArch64::BI__builtin_arm_clz64) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Arg->getType()); Value *Res = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); if (BuiltinID == clang::AArch64::BI__builtin_arm_clz64) Res = Builder.CreateTrunc(Res, Builder.getInt32Ty()); return Res; } if (BuiltinID == clang::AArch64::BI__builtin_arm_cls) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls), Arg, "cls"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_cls64) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_cls64), Arg, "cls"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32zf || BuiltinID == clang::AArch64::BI__builtin_arm_rint32z) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32z, Ty), Arg, "frint32z"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64zf || BuiltinID == clang::AArch64::BI__builtin_arm_rint64z) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64z, Ty), Arg, "frint64z"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_rint32xf || BuiltinID == clang::AArch64::BI__builtin_arm_rint32x) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint32x, Ty), Arg, "frint32x"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_rint64xf || BuiltinID == clang::AArch64::BI__builtin_arm_rint64x) { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *Ty = Arg->getType(); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_frint64x, Ty), Arg, "frint64x"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_jcvt) { assert((getContext().getTypeSize(E->getType()) == 32) && "__jcvt of unusual size!"); llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg); } if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b || BuiltinID == clang::AArch64::BI__builtin_arm_st64b || BuiltinID == clang::AArch64::BI__builtin_arm_st64bv || BuiltinID == clang::AArch64::BI__builtin_arm_st64bv0) { llvm::Value *MemAddr = EmitScalarExpr(E->getArg(0)); llvm::Value *ValPtr = EmitScalarExpr(E->getArg(1)); if (BuiltinID == clang::AArch64::BI__builtin_arm_ld64b) { // Load from the address via an LLVM intrinsic, receiving a // tuple of 8 i64 words, and store each one to ValPtr. Function *F = CGM.getIntrinsic(Intrinsic::aarch64_ld64b); llvm::Value *Val = Builder.CreateCall(F, MemAddr); llvm::Value *ToRet; for (size_t i = 0; i < 8; i++) { llvm::Value *ValOffsetPtr = Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); ToRet = Builder.CreateStore(Builder.CreateExtractValue(Val, i), Addr); } return ToRet; } else { // Load 8 i64 words from ValPtr, and store them to the address // via an LLVM intrinsic. SmallVector Args; Args.push_back(MemAddr); for (size_t i = 0; i < 8; i++) { llvm::Value *ValOffsetPtr = Builder.CreateGEP(Int64Ty, ValPtr, Builder.getInt32(i)); Address Addr = Address(ValOffsetPtr, Int64Ty, CharUnits::fromQuantity(8)); Args.push_back(Builder.CreateLoad(Addr)); } auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_st64b ? Intrinsic::aarch64_st64b : BuiltinID == clang::AArch64::BI__builtin_arm_st64bv ? Intrinsic::aarch64_st64bv : Intrinsic::aarch64_st64bv0); Function *F = CGM.getIntrinsic(Intr); return Builder.CreateCall(F, Args); } } if (BuiltinID == clang::AArch64::BI__builtin_arm_rndr || BuiltinID == clang::AArch64::BI__builtin_arm_rndrrs) { auto Intr = (BuiltinID == clang::AArch64::BI__builtin_arm_rndr ? Intrinsic::aarch64_rndr : Intrinsic::aarch64_rndrrs); Function *F = CGM.getIntrinsic(Intr); llvm::Value *Val = Builder.CreateCall(F); Value *RandomValue = Builder.CreateExtractValue(Val, 0); Value *Status = Builder.CreateExtractValue(Val, 1); Address MemAddress = EmitPointerWithAlignment(E->getArg(0)); Builder.CreateStore(RandomValue, MemAddress); Status = Builder.CreateZExt(Status, Int32Ty); return Status; } if (BuiltinID == clang::AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); Value *Ops[2]; for (unsigned i = 0; i < 2; i++) Ops[i] = EmitScalarExpr(E->getArg(i)); llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); llvm::FunctionType *FTy = cast(Ty); StringRef Name = FD->getName(); return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); } if ((BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) && getContext().getTypeSize(E->getType()) == 128) { Function *F = CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp); Value *LdPtr = EmitScalarExpr(E->getArg(0)); Value *Val = Builder.CreateCall(F, LdPtr, "ldxp"); Value *Val0 = Builder.CreateExtractValue(Val, 1); Value *Val1 = Builder.CreateExtractValue(Val, 0); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); Val0 = Builder.CreateZExt(Val0, Int128Ty); Val1 = Builder.CreateZExt(Val1, Int128Ty); Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); Val = Builder.CreateOr(Val, Val1); return Builder.CreateBitCast(Val, ConvertType(E->getType())); } else if (BuiltinID == clang::AArch64::BI__builtin_arm_ldrex || BuiltinID == clang::AArch64::BI__builtin_arm_ldaex) { Value *LoadAddr = EmitScalarExpr(E->getArg(0)); QualType Ty = E->getType(); llvm::Type *RealResTy = ConvertType(Ty); llvm::Type *IntTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); Function *F = CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_ldaex ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr, UnqualPtrTy); CallInst *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); Val->addParamAttr( 0, Attribute::get(getLLVMContext(), Attribute::ElementType, IntTy)); if (RealResTy->isPointerTy()) return Builder.CreateIntToPtr(Val, RealResTy); llvm::Type *IntResTy = llvm::IntegerType::get( getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy)); return Builder.CreateBitCast(Builder.CreateTruncOrBitCast(Val, IntResTy), RealResTy); } if ((BuiltinID == clang::AArch64::BI__builtin_arm_strex || BuiltinID == clang::AArch64::BI__builtin_arm_stlex) && getContext().getTypeSize(E->getArg(0)->getType()) == 128) { Function *F = CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp); llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty); Address Tmp = CreateMemTemp(E->getArg(0)->getType()); EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); Tmp = Tmp.withElementType(STy); llvm::Value *Val = Builder.CreateLoad(Tmp); Value *Arg0 = Builder.CreateExtractValue(Val, 0); Value *Arg1 = Builder.CreateExtractValue(Val, 1); Value *StPtr = EmitScalarExpr(E->getArg(1)); return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); } if (BuiltinID == clang::AArch64::BI__builtin_arm_strex || BuiltinID == clang::AArch64::BI__builtin_arm_stlex) { Value *StoreVal = EmitScalarExpr(E->getArg(0)); Value *StoreAddr = EmitScalarExpr(E->getArg(1)); QualType Ty = E->getArg(0)->getType(); llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(Ty)); if (StoreVal->getType()->isPointerTy()) StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); else { llvm::Type *IntTy = llvm::IntegerType::get( getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType())); StoreVal = Builder.CreateBitCast(StoreVal, IntTy); StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); } Function *F = CGM.getIntrinsic(BuiltinID == clang::AArch64::BI__builtin_arm_stlex ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr, StoreAddr->getType()); CallInst *CI = Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); CI->addParamAttr( 1, Attribute::get(getLLVMContext(), Attribute::ElementType, StoreTy)); return CI; } if (BuiltinID == clang::AArch64::BI__getReg) { Expr::EvalResult Result; if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); llvm::APSInt Value = Result.Val.getInt(); LLVMContext &Context = CGM.getLLVMContext(); std::string Reg = Value == 31 ? "sp" : "x" + toString(Value, 10); llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)}; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); return Builder.CreateCall(F, Metadata); } if (BuiltinID == clang::AArch64::BI__break) { Expr::EvalResult Result; if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext())) llvm_unreachable("Sema will ensure that the parameter is constant"); llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::aarch64_break); return Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); } if (BuiltinID == clang::AArch64::BI__builtin_arm_clrex) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); return Builder.CreateCall(F); } if (BuiltinID == clang::AArch64::BI_ReadWriteBarrier) return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::SingleThread); // CRC32 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { case clang::AArch64::BI__builtin_arm_crc32b: CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; case clang::AArch64::BI__builtin_arm_crc32cb: CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; case clang::AArch64::BI__builtin_arm_crc32h: CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; case clang::AArch64::BI__builtin_arm_crc32ch: CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; case clang::AArch64::BI__builtin_arm_crc32w: CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; case clang::AArch64::BI__builtin_arm_crc32cw: CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; case clang::AArch64::BI__builtin_arm_crc32d: CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; case clang::AArch64::BI__builtin_arm_crc32cd: CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; } if (CRCIntrinsicID != Intrinsic::not_intrinsic) { Value *Arg0 = EmitScalarExpr(E->getArg(0)); Value *Arg1 = EmitScalarExpr(E->getArg(1)); Function *F = CGM.getIntrinsic(CRCIntrinsicID); llvm::Type *DataTy = F->getFunctionType()->getParamType(1); Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); return Builder.CreateCall(F, {Arg0, Arg1}); } // Memory Operations (MOPS) if (BuiltinID == AArch64::BI__builtin_arm_mops_memset_tag) { Value *Dst = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); Value *Size = EmitScalarExpr(E->getArg(2)); Dst = Builder.CreatePointerCast(Dst, Int8PtrTy); Val = Builder.CreateTrunc(Val, Int8Ty); Size = Builder.CreateIntCast(Size, Int64Ty, false); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::aarch64_mops_memset_tag), {Dst, Val, Size}); } // Memory Tagging Extensions (MTE) Intrinsics Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic; switch (BuiltinID) { case clang::AArch64::BI__builtin_arm_irg: MTEIntrinsicID = Intrinsic::aarch64_irg; break; case clang::AArch64::BI__builtin_arm_addg: MTEIntrinsicID = Intrinsic::aarch64_addg; break; case clang::AArch64::BI__builtin_arm_gmi: MTEIntrinsicID = Intrinsic::aarch64_gmi; break; case clang::AArch64::BI__builtin_arm_ldg: MTEIntrinsicID = Intrinsic::aarch64_ldg; break; case clang::AArch64::BI__builtin_arm_stg: MTEIntrinsicID = Intrinsic::aarch64_stg; break; case clang::AArch64::BI__builtin_arm_subp: MTEIntrinsicID = Intrinsic::aarch64_subp; break; } if (MTEIntrinsicID != Intrinsic::not_intrinsic) { llvm::Type *T = ConvertType(E->getType()); if (MTEIntrinsicID == Intrinsic::aarch64_irg) { Value *Pointer = EmitScalarExpr(E->getArg(0)); Value *Mask = EmitScalarExpr(E->getArg(1)); Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); Mask = Builder.CreateZExt(Mask, Int64Ty); Value *RV = Builder.CreateCall( CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask}); return Builder.CreatePointerCast(RV, T); } if (MTEIntrinsicID == Intrinsic::aarch64_addg) { Value *Pointer = EmitScalarExpr(E->getArg(0)); Value *TagOffset = EmitScalarExpr(E->getArg(1)); Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); TagOffset = Builder.CreateZExt(TagOffset, Int64Ty); Value *RV = Builder.CreateCall( CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset}); return Builder.CreatePointerCast(RV, T); } if (MTEIntrinsicID == Intrinsic::aarch64_gmi) { Value *Pointer = EmitScalarExpr(E->getArg(0)); Value *ExcludedMask = EmitScalarExpr(E->getArg(1)); ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty); Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy); return Builder.CreateCall( CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask}); } // Although it is possible to supply a different return // address (first arg) to this intrinsic, for now we set // return address same as input address. if (MTEIntrinsicID == Intrinsic::aarch64_ldg) { Value *TagAddress = EmitScalarExpr(E->getArg(0)); TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); Value *RV = Builder.CreateCall( CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); return Builder.CreatePointerCast(RV, T); } // Although it is possible to supply a different tag (to set) // to this intrinsic (as first arg), for now we supply // the tag that is in input address arg (common use case). if (MTEIntrinsicID == Intrinsic::aarch64_stg) { Value *TagAddress = EmitScalarExpr(E->getArg(0)); TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy); return Builder.CreateCall( CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress}); } if (MTEIntrinsicID == Intrinsic::aarch64_subp) { Value *PointerA = EmitScalarExpr(E->getArg(0)); Value *PointerB = EmitScalarExpr(E->getArg(1)); PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy); PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy); return Builder.CreateCall( CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB}); } } if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || BuiltinID == clang::AArch64::BI__builtin_arm_wsr || BuiltinID == clang::AArch64::BI__builtin_arm_wsr64 || BuiltinID == clang::AArch64::BI__builtin_arm_wsr128 || BuiltinID == clang::AArch64::BI__builtin_arm_wsrp) { SpecialRegisterAccessKind AccessKind = Write; if (BuiltinID == clang::AArch64::BI__builtin_arm_rsr || BuiltinID == clang::AArch64::BI__builtin_arm_rsr64 || BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || BuiltinID == clang::AArch64::BI__builtin_arm_rsrp) AccessKind = VolatileRead; bool IsPointerBuiltin = BuiltinID == clang::AArch64::BI__builtin_arm_rsrp || BuiltinID == clang::AArch64::BI__builtin_arm_wsrp; bool Is32Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr || BuiltinID == clang::AArch64::BI__builtin_arm_wsr; bool Is128Bit = BuiltinID == clang::AArch64::BI__builtin_arm_rsr128 || BuiltinID == clang::AArch64::BI__builtin_arm_wsr128; llvm::Type *ValueType; llvm::Type *RegisterType = Int64Ty; if (Is32Bit) { ValueType = Int32Ty; } else if (Is128Bit) { llvm::Type *Int128Ty = llvm::IntegerType::getInt128Ty(CGM.getLLVMContext()); ValueType = Int128Ty; RegisterType = Int128Ty; } else if (IsPointerBuiltin) { ValueType = VoidPtrTy; } else { ValueType = Int64Ty; }; return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, AccessKind); } if (BuiltinID == clang::AArch64::BI_ReadStatusReg || BuiltinID == clang::AArch64::BI_WriteStatusReg) { LLVMContext &Context = CGM.getLLVMContext(); unsigned SysReg = E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue(); std::string SysRegStr; llvm::raw_string_ostream(SysRegStr) << ((1 << 1) | ((SysReg >> 14) & 1)) << ":" << ((SysReg >> 11) & 7) << ":" << ((SysReg >> 7) & 15) << ":" << ((SysReg >> 3) & 15) << ":" << ( SysReg & 7); llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) }; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Type *RegisterType = Int64Ty; llvm::Type *Types[] = { RegisterType }; if (BuiltinID == clang::AArch64::BI_ReadStatusReg) { llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); return Builder.CreateCall(F, Metadata); } llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1)); return Builder.CreateCall(F, { Metadata, ArgValue }); } if (BuiltinID == clang::AArch64::BI_AddressOfReturnAddress) { llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } if (BuiltinID == clang::AArch64::BI__builtin_sponentry) { llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry, AllocaInt8PtrTy); return Builder.CreateCall(F); } if (BuiltinID == clang::AArch64::BI__mulh || BuiltinID == clang::AArch64::BI__umulh) { llvm::Type *ResType = ConvertType(E->getType()); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); bool IsSigned = BuiltinID == clang::AArch64::BI__mulh; Value *LHS = Builder.CreateIntCast(EmitScalarExpr(E->getArg(0)), Int128Ty, IsSigned); Value *RHS = Builder.CreateIntCast(EmitScalarExpr(E->getArg(1)), Int128Ty, IsSigned); Value *MulResult, *HigherBits; if (IsSigned) { MulResult = Builder.CreateNSWMul(LHS, RHS); HigherBits = Builder.CreateAShr(MulResult, 64); } else { MulResult = Builder.CreateNUWMul(LHS, RHS); HigherBits = Builder.CreateLShr(MulResult, 64); } HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); return HigherBits; } if (BuiltinID == AArch64::BI__writex18byte || BuiltinID == AArch64::BI__writex18word || BuiltinID == AArch64::BI__writex18dword || BuiltinID == AArch64::BI__writex18qword) { // Read x18 as i8* LLVMContext &Context = CGM.getLLVMContext(); llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); llvm::Value *X18 = Builder.CreateCall(F, Metadata); X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); // Store val at x18 + offset Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); Value *Val = EmitScalarExpr(E->getArg(1)); StoreInst *Store = Builder.CreateAlignedStore(Val, Ptr, CharUnits::One()); return Store; } if (BuiltinID == AArch64::BI__readx18byte || BuiltinID == AArch64::BI__readx18word || BuiltinID == AArch64::BI__readx18dword || BuiltinID == AArch64::BI__readx18qword) { llvm::Type *IntTy = ConvertType(E->getType()); // Read x18 as i8* LLVMContext &Context = CGM.getLLVMContext(); llvm::Metadata *Ops[] = {llvm::MDString::get(Context, "x18")}; llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty}); llvm::Value *X18 = Builder.CreateCall(F, Metadata); X18 = Builder.CreateIntToPtr(X18, Int8PtrTy); // Load x18 + offset Value *Offset = Builder.CreateZExt(EmitScalarExpr(E->getArg(0)), Int64Ty); Value *Ptr = Builder.CreateGEP(Int8Ty, X18, Offset); LoadInst *Load = Builder.CreateAlignedLoad(IntTy, Ptr, CharUnits::One()); return Load; } if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 || BuiltinID == AArch64::BI_CopyFloatFromInt32 || BuiltinID == AArch64::BI_CopyInt32FromFloat || BuiltinID == AArch64::BI_CopyInt64FromDouble) { Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *RetTy = ConvertType(E->getType()); return Builder.CreateBitCast(Arg, RetTy); } if (BuiltinID == AArch64::BI_CountLeadingOnes || BuiltinID == AArch64::BI_CountLeadingOnes64 || BuiltinID == AArch64::BI_CountLeadingZeros || BuiltinID == AArch64::BI_CountLeadingZeros64) { Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = Arg->getType(); if (BuiltinID == AArch64::BI_CountLeadingOnes || BuiltinID == AArch64::BI_CountLeadingOnes64) Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType)); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); if (BuiltinID == AArch64::BI_CountLeadingOnes64 || BuiltinID == AArch64::BI_CountLeadingZeros64) Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); return Result; } if (BuiltinID == AArch64::BI_CountLeadingSigns || BuiltinID == AArch64::BI_CountLeadingSigns64) { Value *Arg = EmitScalarExpr(E->getArg(0)); Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns) ? CGM.getIntrinsic(Intrinsic::aarch64_cls) : CGM.getIntrinsic(Intrinsic::aarch64_cls64); Value *Result = Builder.CreateCall(F, Arg, "cls"); if (BuiltinID == AArch64::BI_CountLeadingSigns64) Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); return Result; } if (BuiltinID == AArch64::BI_CountOneBits || BuiltinID == AArch64::BI_CountOneBits64) { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); Value *Result = Builder.CreateCall(F, ArgValue); if (BuiltinID == AArch64::BI_CountOneBits64) Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); return Result; } if (BuiltinID == AArch64::BI__prefetch) { Value *Address = EmitScalarExpr(E->getArg(0)); Value *RW = llvm::ConstantInt::get(Int32Ty, 0); Value *Locality = ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } if (BuiltinID == AArch64::BI__hlt) { Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hlt); Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0))}); // Return 0 for convenience, even though MSVC returns some other undefined // value. return ConstantInt::get(Builder.getInt32Ty(), 0); } // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional MsvcIntId = translateAarch64ToMsvcIntrin(BuiltinID)) return EmitMSVCBuiltinExpr(*MsvcIntId, E); // Some intrinsics are equivalent - if they are use the base intrinsic ID. auto It = llvm::find_if(NEONEquivalentIntrinsicMap, [BuiltinID](auto &P) { return P.first == BuiltinID; }); if (It != end(NEONEquivalentIntrinsicMap)) BuiltinID = It->second; // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); llvm::SmallVector Ops; Address PtrOp0 = Address::invalid(); for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { if (i == 0) { switch (BuiltinID) { case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: case NEON::BI__builtin_neon_vld1_lane_v: case NEON::BI__builtin_neon_vld1q_lane_v: case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: case NEON::BI__builtin_neon_vst1_lane_v: case NEON::BI__builtin_neon_vst1q_lane_v: case NEON::BI__builtin_neon_vldap1_lane_s64: case NEON::BI__builtin_neon_vldap1q_lane_s64: case NEON::BI__builtin_neon_vstl1_lane_s64: case NEON::BI__builtin_neon_vstl1q_lane_s64: // Get the alignment for the argument in addition to the value; // we'll use it later. PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); Ops.push_back(PtrOp0.emitRawPointer(*this)); continue; } } Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } auto SISDMap = ArrayRef(AArch64SISDIntrinsicMap); const ARMVectorIntrinsicInfo *Builtin = findARMVectorIntrinsicInMap( SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); if (Builtin) { Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); assert(Result && "SISD intrinsic should have been handled"); return Result; } const Expr *Arg = E->getArg(E->getNumArgs()-1); NeonTypeFlags Type(0); if (std::optional Result = Arg->getIntegerConstantExpr(getContext())) // Determine the type of this overloaded NEON intrinsic. Type = NeonTypeFlags(Result->getZExtValue()); bool usgn = Type.isUnsigned(); bool quad = Type.isQuad(); // Handle non-overloaded intrinsics first. switch (BuiltinID) { default: break; case NEON::BI__builtin_neon_vabsh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs"); case NEON::BI__builtin_neon_vaddq_p128: { llvm::Type *Ty = GetNeonType(this, NeonTypeFlags::Poly128); Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[0] = Builder.CreateXor(Ops[0], Ops[1]); llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); return Builder.CreateBitCast(Ops[0], Int128Ty); } case NEON::BI__builtin_neon_vldrq_p128: { llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128); Value *Ptr = EmitScalarExpr(E->getArg(0)); return Builder.CreateAlignedLoad(Int128Ty, Ptr, CharUnits::fromQuantity(16)); } case NEON::BI__builtin_neon_vstrq_p128: { Value *Ptr = Ops[0]; return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); } case NEON::BI__builtin_neon_vcvts_f32_u32: case NEON::BI__builtin_neon_vcvtd_f64_u64: usgn = true; [[fallthrough]]; case NEON::BI__builtin_neon_vcvts_f32_s32: case NEON::BI__builtin_neon_vcvtd_f64_s64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; Ops[0] = Builder.CreateBitCast(Ops[0], InTy); if (usgn) return Builder.CreateUIToFP(Ops[0], FTy); return Builder.CreateSIToFP(Ops[0], FTy); } case NEON::BI__builtin_neon_vcvth_f16_u16: case NEON::BI__builtin_neon_vcvth_f16_u32: case NEON::BI__builtin_neon_vcvth_f16_u64: usgn = true; [[fallthrough]]; case NEON::BI__builtin_neon_vcvth_f16_s16: case NEON::BI__builtin_neon_vcvth_f16_s32: case NEON::BI__builtin_neon_vcvth_f16_s64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); llvm::Type *FTy = HalfTy; llvm::Type *InTy; if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64) InTy = Int64Ty; else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32) InTy = Int32Ty; else InTy = Int16Ty; Ops[0] = Builder.CreateBitCast(Ops[0], InTy); if (usgn) return Builder.CreateUIToFP(Ops[0], FTy); return Builder.CreateSIToFP(Ops[0], FTy); } case NEON::BI__builtin_neon_vcvtah_u16_f16: case NEON::BI__builtin_neon_vcvtmh_u16_f16: case NEON::BI__builtin_neon_vcvtnh_u16_f16: case NEON::BI__builtin_neon_vcvtph_u16_f16: case NEON::BI__builtin_neon_vcvth_u16_f16: case NEON::BI__builtin_neon_vcvtah_s16_f16: case NEON::BI__builtin_neon_vcvtmh_s16_f16: case NEON::BI__builtin_neon_vcvtnh_s16_f16: case NEON::BI__builtin_neon_vcvtph_s16_f16: case NEON::BI__builtin_neon_vcvth_s16_f16: { unsigned Int; llvm::Type* InTy = Int32Ty; llvm::Type* FTy = HalfTy; llvm::Type *Tys[2] = {InTy, FTy}; Ops.push_back(EmitScalarExpr(E->getArg(0))); switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vcvtah_u16_f16: Int = Intrinsic::aarch64_neon_fcvtau; break; case NEON::BI__builtin_neon_vcvtmh_u16_f16: Int = Intrinsic::aarch64_neon_fcvtmu; break; case NEON::BI__builtin_neon_vcvtnh_u16_f16: Int = Intrinsic::aarch64_neon_fcvtnu; break; case NEON::BI__builtin_neon_vcvtph_u16_f16: Int = Intrinsic::aarch64_neon_fcvtpu; break; case NEON::BI__builtin_neon_vcvth_u16_f16: Int = Intrinsic::aarch64_neon_fcvtzu; break; case NEON::BI__builtin_neon_vcvtah_s16_f16: Int = Intrinsic::aarch64_neon_fcvtas; break; case NEON::BI__builtin_neon_vcvtmh_s16_f16: Int = Intrinsic::aarch64_neon_fcvtms; break; case NEON::BI__builtin_neon_vcvtnh_s16_f16: Int = Intrinsic::aarch64_neon_fcvtns; break; case NEON::BI__builtin_neon_vcvtph_s16_f16: Int = Intrinsic::aarch64_neon_fcvtps; break; case NEON::BI__builtin_neon_vcvth_s16_f16: Int = Intrinsic::aarch64_neon_fcvtzs; break; } Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vcaleh_f16: case NEON::BI__builtin_neon_vcalth_f16: case NEON::BI__builtin_neon_vcageh_f16: case NEON::BI__builtin_neon_vcagth_f16: { unsigned Int; llvm::Type* InTy = Int32Ty; llvm::Type* FTy = HalfTy; llvm::Type *Tys[2] = {InTy, FTy}; Ops.push_back(EmitScalarExpr(E->getArg(1))); switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vcageh_f16: Int = Intrinsic::aarch64_neon_facge; break; case NEON::BI__builtin_neon_vcagth_f16: Int = Intrinsic::aarch64_neon_facgt; break; case NEON::BI__builtin_neon_vcaleh_f16: Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break; case NEON::BI__builtin_neon_vcalth_f16: Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break; } Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vcvth_n_s16_f16: case NEON::BI__builtin_neon_vcvth_n_u16_f16: { unsigned Int; llvm::Type* InTy = Int32Ty; llvm::Type* FTy = HalfTy; llvm::Type *Tys[2] = {InTy, FTy}; Ops.push_back(EmitScalarExpr(E->getArg(1))); switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vcvth_n_s16_f16: Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break; case NEON::BI__builtin_neon_vcvth_n_u16_f16: Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break; } Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vcvth_n_f16_s16: case NEON::BI__builtin_neon_vcvth_n_f16_u16: { unsigned Int; llvm::Type* FTy = HalfTy; llvm::Type* InTy = Int32Ty; llvm::Type *Tys[2] = {FTy, InTy}; Ops.push_back(EmitScalarExpr(E->getArg(1))); switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vcvth_n_f16_s16: Int = Intrinsic::aarch64_neon_vcvtfxs2fp; Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext"); break; case NEON::BI__builtin_neon_vcvth_n_f16_u16: Int = Intrinsic::aarch64_neon_vcvtfxu2fp; Ops[0] = Builder.CreateZExt(Ops[0], InTy); break; } return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n"); } case NEON::BI__builtin_neon_vpaddd_s64: { auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); // Pairwise addition of a v2f64 into a scalar f64. return Builder.CreateAdd(Op0, Op1, "vpaddd"); } case NEON::BI__builtin_neon_vpaddd_f64: { auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f64, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); // Pairwise addition of a v2f64 into a scalar f64. return Builder.CreateFAdd(Op0, Op1, "vpaddd"); } case NEON::BI__builtin_neon_vpadds_f32: { auto *Ty = llvm::FixedVectorType::get(FloatTy, 2); Value *Vec = EmitScalarExpr(E->getArg(0)); // The vector is v2f32, so make sure it's bitcast to that. Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); // Pairwise addition of a v2f32 into a scalar f32. return Builder.CreateFAdd(Op0, Op1, "vpaddd"); } case NEON::BI__builtin_neon_vceqzd_s64: case NEON::BI__builtin_neon_vceqzd_f64: case NEON::BI__builtin_neon_vceqzs_f32: case NEON::BI__builtin_neon_vceqzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); case NEON::BI__builtin_neon_vcgezd_s64: case NEON::BI__builtin_neon_vcgezd_f64: case NEON::BI__builtin_neon_vcgezs_f32: case NEON::BI__builtin_neon_vcgezh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); case NEON::BI__builtin_neon_vclezd_s64: case NEON::BI__builtin_neon_vclezd_f64: case NEON::BI__builtin_neon_vclezs_f32: case NEON::BI__builtin_neon_vclezh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); case NEON::BI__builtin_neon_vcgtzd_s64: case NEON::BI__builtin_neon_vcgtzd_f64: case NEON::BI__builtin_neon_vcgtzs_f32: case NEON::BI__builtin_neon_vcgtzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); case NEON::BI__builtin_neon_vcltzd_s64: case NEON::BI__builtin_neon_vcltzd_f64: case NEON::BI__builtin_neon_vcltzs_f32: case NEON::BI__builtin_neon_vcltzh_f16: Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitAArch64CompareBuiltinExpr( Ops[0], ConvertType(E->getCallReturnType(getContext())), ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); case NEON::BI__builtin_neon_vceqzd_u64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); Ops[0] = Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); } case NEON::BI__builtin_neon_vceqd_f64: case NEON::BI__builtin_neon_vcled_f64: case NEON::BI__builtin_neon_vcltd_f64: case NEON::BI__builtin_neon_vcged_f64: case NEON::BI__builtin_neon_vcgtd_f64: { llvm::CmpInst::Predicate P; switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; } Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); if (P == llvm::FCmpInst::FCMP_OEQ) Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); else Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqs_f32: case NEON::BI__builtin_neon_vcles_f32: case NEON::BI__builtin_neon_vclts_f32: case NEON::BI__builtin_neon_vcges_f32: case NEON::BI__builtin_neon_vcgts_f32: { llvm::CmpInst::Predicate P; switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; } Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); if (P == llvm::FCmpInst::FCMP_OEQ) Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); else Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqh_f16: case NEON::BI__builtin_neon_vcleh_f16: case NEON::BI__builtin_neon_vclth_f16: case NEON::BI__builtin_neon_vcgeh_f16: case NEON::BI__builtin_neon_vcgth_f16: { llvm::CmpInst::Predicate P; switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break; case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break; case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break; case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break; case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break; } Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy); if (P == llvm::FCmpInst::FCMP_OEQ) Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); else Ops[0] = Builder.CreateFCmpS(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd"); } case NEON::BI__builtin_neon_vceqd_s64: case NEON::BI__builtin_neon_vceqd_u64: case NEON::BI__builtin_neon_vcgtd_s64: case NEON::BI__builtin_neon_vcgtd_u64: case NEON::BI__builtin_neon_vcltd_s64: case NEON::BI__builtin_neon_vcltd_u64: case NEON::BI__builtin_neon_vcged_u64: case NEON::BI__builtin_neon_vcged_s64: case NEON::BI__builtin_neon_vcled_u64: case NEON::BI__builtin_neon_vcled_s64: { llvm::CmpInst::Predicate P; switch (BuiltinID) { default: llvm_unreachable("missing builtin ID in switch!"); case NEON::BI__builtin_neon_vceqd_s64: case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; } Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); } case NEON::BI__builtin_neon_vtstd_s64: case NEON::BI__builtin_neon_vtstd_u64: { Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], llvm::Constant::getNullValue(Int64Ty)); return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); } case NEON::BI__builtin_neon_vset_lane_i8: case NEON::BI__builtin_neon_vset_lane_i16: case NEON::BI__builtin_neon_vset_lane_i32: case NEON::BI__builtin_neon_vset_lane_i64: case NEON::BI__builtin_neon_vset_lane_bf16: case NEON::BI__builtin_neon_vset_lane_f32: case NEON::BI__builtin_neon_vsetq_lane_i8: case NEON::BI__builtin_neon_vsetq_lane_i16: case NEON::BI__builtin_neon_vsetq_lane_i32: case NEON::BI__builtin_neon_vsetq_lane_i64: case NEON::BI__builtin_neon_vsetq_lane_bf16: case NEON::BI__builtin_neon_vsetq_lane_f32: Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vset_lane_f64: // The vector type needs a cast for the v1f64 variant. Ops[1] = Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1)); Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vsetq_lane_f64: // The vector type needs a cast for the v2f64 variant. Ops[1] = Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2)); Ops.push_back(EmitScalarExpr(E->getArg(2))); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); case NEON::BI__builtin_neon_vget_lane_i8: case NEON::BI__builtin_neon_vdupb_lane_i8: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_i8: case NEON::BI__builtin_neon_vdupb_laneq_i8: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i16: case NEON::BI__builtin_neon_vduph_lane_i16: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_i16: case NEON::BI__builtin_neon_vduph_laneq_i16: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i32: case NEON::BI__builtin_neon_vdups_lane_i32: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vdups_lane_f32: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vdups_lane"); case NEON::BI__builtin_neon_vgetq_lane_i32: case NEON::BI__builtin_neon_vdups_laneq_i32: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_i64: case NEON::BI__builtin_neon_vdupd_lane_i64: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vdupd_lane_f64: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vdupd_lane"); case NEON::BI__builtin_neon_vgetq_lane_i64: case NEON::BI__builtin_neon_vdupd_laneq_i64: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vget_lane_f32: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vget_lane_f64: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); case NEON::BI__builtin_neon_vgetq_lane_f32: case NEON::BI__builtin_neon_vdups_laneq_f32: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vgetq_lane_f64: case NEON::BI__builtin_neon_vdupd_laneq_f64: Ops[0] = Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2)); return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); case NEON::BI__builtin_neon_vaddh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh"); case NEON::BI__builtin_neon_vsubh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFSub(Ops[0], Ops[1], "vsubh"); case NEON::BI__builtin_neon_vmulh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFMul(Ops[0], Ops[1], "vmulh"); case NEON::BI__builtin_neon_vdivh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh"); case NEON::BI__builtin_neon_vfmah_f16: // NEON intrinsic puts accumulator first, unlike the LLVM fma. return emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]}); case NEON::BI__builtin_neon_vfmsh_f16: { Value* Neg = Builder.CreateFNeg(EmitScalarExpr(E->getArg(1)), "vsubh"); // NEON intrinsic puts accumulator first, unlike the LLVM fma. return emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, HalfTy, {Neg, EmitScalarExpr(E->getArg(2)), Ops[0]}); } case NEON::BI__builtin_neon_vaddd_s64: case NEON::BI__builtin_neon_vaddd_u64: return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); case NEON::BI__builtin_neon_vsubd_s64: case NEON::BI__builtin_neon_vsubd_u64: return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); case NEON::BI__builtin_neon_vqdmlalh_s16: case NEON::BI__builtin_neon_vqdmlslh_s16: { SmallVector ProductOps; ProductOps.push_back(vectorWrapScalar16(Ops[1])); ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), ProductOps, "vqdmlXl"); Constant *CI = ConstantInt::get(SizeTy, 0); Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 ? Intrinsic::aarch64_neon_sqadd : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); } case NEON::BI__builtin_neon_vqshlud_n_s64: { Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), Ops, "vqshlu_n"); } case NEON::BI__builtin_neon_vqshld_n_u64: case NEON::BI__builtin_neon_vqshld_n_s64: { unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 ? Intrinsic::aarch64_neon_uqshl : Intrinsic::aarch64_neon_sqshl; Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); } case NEON::BI__builtin_neon_vrshrd_n_u64: case NEON::BI__builtin_neon_vrshrd_n_s64: { unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; Ops.push_back(EmitScalarExpr(E->getArg(1))); int SV = cast(Ops[1])->getSExtValue(); Ops[1] = ConstantInt::get(Int64Ty, -SV); return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); } case NEON::BI__builtin_neon_vrsrad_n_u64: case NEON::BI__builtin_neon_vrsrad_n_s64: { unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); } case NEON::BI__builtin_neon_vshld_n_s64: case NEON::BI__builtin_neon_vshld_n_u64: { llvm::ConstantInt *Amt = cast(EmitScalarExpr(E->getArg(1))); return Builder.CreateShl( Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); } case NEON::BI__builtin_neon_vshrd_n_s64: { llvm::ConstantInt *Amt = cast(EmitScalarExpr(E->getArg(1))); return Builder.CreateAShr( Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast(63), Amt->getZExtValue())), "shrd_n"); } case NEON::BI__builtin_neon_vshrd_n_u64: { llvm::ConstantInt *Amt = cast(EmitScalarExpr(E->getArg(1))); uint64_t ShiftAmt = Amt->getZExtValue(); // Right-shifting an unsigned value by its size yields 0. if (ShiftAmt == 64) return ConstantInt::get(Int64Ty, 0); return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), "shrd_n"); } case NEON::BI__builtin_neon_vsrad_n_s64: { llvm::ConstantInt *Amt = cast(EmitScalarExpr(E->getArg(2))); Ops[1] = Builder.CreateAShr( Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast(63), Amt->getZExtValue())), "shrd_n"); return Builder.CreateAdd(Ops[0], Ops[1]); } case NEON::BI__builtin_neon_vsrad_n_u64: { llvm::ConstantInt *Amt = cast(EmitScalarExpr(E->getArg(2))); uint64_t ShiftAmt = Amt->getZExtValue(); // Right-shifting an unsigned value by its size yields 0. // As Op + 0 = Op, return Ops[0] directly. if (ShiftAmt == 64) return Ops[0]; Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), "shrd_n"); return Builder.CreateAdd(Ops[0], Ops[1]); } case NEON::BI__builtin_neon_vqdmlalh_lane_s16: case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: case NEON::BI__builtin_neon_vqdmlslh_lane_s16: case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), "lane"); SmallVector ProductOps; ProductOps.push_back(vectorWrapScalar16(Ops[1])); ProductOps.push_back(vectorWrapScalar16(Ops[2])); auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), ProductOps, "vqdmlXl"); Constant *CI = ConstantInt::get(SizeTy, 0); Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); Ops.pop_back(); unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) ? Intrinsic::aarch64_neon_sqadd : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); } case NEON::BI__builtin_neon_vqdmlals_s32: case NEON::BI__builtin_neon_vqdmlsls_s32: { SmallVector ProductOps; ProductOps.push_back(Ops[1]); ProductOps.push_back(EmitScalarExpr(E->getArg(2))); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), ProductOps, "vqdmlXl"); unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 ? Intrinsic::aarch64_neon_sqadd : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); } case NEON::BI__builtin_neon_vqdmlals_lane_s32: case NEON::BI__builtin_neon_vqdmlals_laneq_s32: case NEON::BI__builtin_neon_vqdmlsls_lane_s32: case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), "lane"); SmallVector ProductOps; ProductOps.push_back(Ops[1]); ProductOps.push_back(Ops[2]); Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), ProductOps, "vqdmlXl"); Ops.pop_back(); unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) ? Intrinsic::aarch64_neon_sqadd : Intrinsic::aarch64_neon_sqsub; return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); } case NEON::BI__builtin_neon_vget_lane_bf16: case NEON::BI__builtin_neon_vduph_lane_bf16: case NEON::BI__builtin_neon_vduph_lane_f16: { return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vget_lane"); } case NEON::BI__builtin_neon_vgetq_lane_bf16: case NEON::BI__builtin_neon_vduph_laneq_bf16: case NEON::BI__builtin_neon_vduph_laneq_f16: { return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), "vgetq_lane"); } case clang::AArch64::BI_InterlockedAdd: case clang::AArch64::BI_InterlockedAdd64: { Address DestAddr = CheckAtomicAlignment(*this, E); Value *Val = EmitScalarExpr(E->getArg(1)); AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Add, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent); return Builder.CreateAdd(RMWI, Val); } } llvm::FixedVectorType *VTy = GetNeonType(this, Type); llvm::Type *Ty = VTy; if (!Ty) return nullptr; // Not all intrinsics handled by the common case work for AArch64 yet, so only // defer to common code if it's been added to our special map. Builtin = findARMVectorIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, AArch64SIMDIntrinsicsProvenSorted); if (Builtin) return EmitCommonNeonBuiltinExpr( Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, Builtin->NameHint, Builtin->TypeModifier, E, Ops, /*never use addresses*/ Address::invalid(), Address::invalid(), Arch); if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch)) return V; unsigned Int; switch (BuiltinID) { default: return nullptr; case NEON::BI__builtin_neon_vbsl_v: case NEON::BI__builtin_neon_vbslq_v: { llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); return Builder.CreateBitCast(Ops[0], Ty); } case NEON::BI__builtin_neon_vfma_lane_v: case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types // The ARM builtins (and instructions) have the addend as the first // operand, but the 'fma' intrinsics have it last. Swap it around here. Value *Addend = Ops[0]; Value *Multiplicand = Ops[1]; Value *LaneSource = Ops[2]; Ops[0] = Multiplicand; Ops[1] = LaneSource; Ops[2] = Addend; // Now adjust things to handle the lane access. auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? llvm::FixedVectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : VTy; llvm::Constant *cst = cast(Ops[3]); Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst); Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); Ops.pop_back(); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_fma : Intrinsic::fma; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); } case NEON::BI__builtin_neon_vfma_laneq_v: { auto *VTy = cast(Ty); // v1f64 fma should be mapped to Neon scalar f64 fma if (VTy && VTy->getElementType() == DoubleTy) { Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); llvm::FixedVectorType *VTy = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, true)); Ops[2] = Builder.CreateBitCast(Ops[2], VTy); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); Value *Result; Result = emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, DoubleTy, {Ops[1], Ops[2], Ops[0]}); return Builder.CreateBitCast(Result, Ty); } Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); auto *STy = llvm::FixedVectorType::get(VTy->getElementType(), VTy->getNumElements() * 2); Ops[2] = Builder.CreateBitCast(Ops[2], STy); Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cast(Ops[3])); Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); return emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmaq_laneq_v: { Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[2] = EmitNeonSplat(Ops[2], cast(Ops[3])); return emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, {Ops[2], Ops[1], Ops[0]}); } case NEON::BI__builtin_neon_vfmah_lane_f16: case NEON::BI__builtin_neon_vfmas_lane_f32: case NEON::BI__builtin_neon_vfmah_laneq_f16: case NEON::BI__builtin_neon_vfmas_laneq_f32: case NEON::BI__builtin_neon_vfmad_lane_f64: case NEON::BI__builtin_neon_vfmad_laneq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(3))); llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); return emitCallMaybeConstrainedFPBuiltin( *this, Intrinsic::fma, Intrinsic::experimental_constrained_fma, Ty, {Ops[1], Ops[2], Ops[0]}); } case NEON::BI__builtin_neon_vmull_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); case NEON::BI__builtin_neon_vmax_v: case NEON::BI__builtin_neon_vmaxq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); case NEON::BI__builtin_neon_vmaxh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(1))); Int = Intrinsic::aarch64_neon_fmax; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax"); } case NEON::BI__builtin_neon_vmin_v: case NEON::BI__builtin_neon_vminq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); case NEON::BI__builtin_neon_vminh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(1))); Int = Intrinsic::aarch64_neon_fmin; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin"); } case NEON::BI__builtin_neon_vabd_v: case NEON::BI__builtin_neon_vabdq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); case NEON::BI__builtin_neon_vpadal_v: case NEON::BI__builtin_neon_vpadalq_v: { unsigned ArgElts = VTy->getNumElements(); llvm::IntegerType *EltTy = cast(VTy->getElementType()); unsigned BitWidth = EltTy->getBitWidth(); auto *ArgTy = llvm::FixedVectorType::get( llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts); llvm::Type* Tys[2] = { VTy, ArgTy }; Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; SmallVector TmpOps; TmpOps.push_back(Ops[1]); Function *F = CGM.getIntrinsic(Int, Tys); llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); return Builder.CreateAdd(tmp, addend); } case NEON::BI__builtin_neon_vpmin_v: case NEON::BI__builtin_neon_vpminq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); case NEON::BI__builtin_neon_vpmax_v: case NEON::BI__builtin_neon_vpmaxq_v: // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); case NEON::BI__builtin_neon_vminnm_v: case NEON::BI__builtin_neon_vminnmq_v: Int = Intrinsic::aarch64_neon_fminnm; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); case NEON::BI__builtin_neon_vminnmh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); Int = Intrinsic::aarch64_neon_fminnm; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm"); case NEON::BI__builtin_neon_vmaxnm_v: case NEON::BI__builtin_neon_vmaxnmq_v: Int = Intrinsic::aarch64_neon_fmaxnm; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); case NEON::BI__builtin_neon_vmaxnmh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); Int = Intrinsic::aarch64_neon_fmaxnm; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm"); case NEON::BI__builtin_neon_vrecpss_f32: { Ops.push_back(EmitScalarExpr(E->getArg(1))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), Ops, "vrecps"); } case NEON::BI__builtin_neon_vrecpsd_f64: Ops.push_back(EmitScalarExpr(E->getArg(1))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), Ops, "vrecps"); case NEON::BI__builtin_neon_vrecpsh_f16: Ops.push_back(EmitScalarExpr(E->getArg(1))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy), Ops, "vrecps"); case NEON::BI__builtin_neon_vqshrun_n_v: Int = Intrinsic::aarch64_neon_sqshrun; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); case NEON::BI__builtin_neon_vqrshrun_n_v: Int = Intrinsic::aarch64_neon_sqrshrun; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); case NEON::BI__builtin_neon_vqshrn_n_v: Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); case NEON::BI__builtin_neon_vrshrn_n_v: Int = Intrinsic::aarch64_neon_rshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); case NEON::BI__builtin_neon_vqrshrn_n_v: Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); case NEON::BI__builtin_neon_vrndah_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_round : Intrinsic::round; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda"); } case NEON::BI__builtin_neon_vrnda_v: case NEON::BI__builtin_neon_vrndaq_v: { Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_round : Intrinsic::round; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); } case NEON::BI__builtin_neon_vrndih_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_nearbyint : Intrinsic::nearbyint; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi"); } case NEON::BI__builtin_neon_vrndmh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_floor : Intrinsic::floor; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm"); } case NEON::BI__builtin_neon_vrndm_v: case NEON::BI__builtin_neon_vrndmq_v: { Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_floor : Intrinsic::floor; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); } case NEON::BI__builtin_neon_vrndnh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_roundeven : Intrinsic::roundeven; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn"); } case NEON::BI__builtin_neon_vrndn_v: case NEON::BI__builtin_neon_vrndnq_v: { Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_roundeven : Intrinsic::roundeven; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); } case NEON::BI__builtin_neon_vrndns_f32: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_roundeven : Intrinsic::roundeven; return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn"); } case NEON::BI__builtin_neon_vrndph_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_ceil : Intrinsic::ceil; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp"); } case NEON::BI__builtin_neon_vrndp_v: case NEON::BI__builtin_neon_vrndpq_v: { Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_ceil : Intrinsic::ceil; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); } case NEON::BI__builtin_neon_vrndxh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_rint : Intrinsic::rint; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx"); } case NEON::BI__builtin_neon_vrndx_v: case NEON::BI__builtin_neon_vrndxq_v: { Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_rint : Intrinsic::rint; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); } case NEON::BI__builtin_neon_vrndh_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_trunc : Intrinsic::trunc; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz"); } case NEON::BI__builtin_neon_vrnd32x_f32: case NEON::BI__builtin_neon_vrnd32xq_f32: case NEON::BI__builtin_neon_vrnd32x_f64: case NEON::BI__builtin_neon_vrnd32xq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x"); } case NEON::BI__builtin_neon_vrnd32z_f32: case NEON::BI__builtin_neon_vrnd32zq_f32: case NEON::BI__builtin_neon_vrnd32z_f64: case NEON::BI__builtin_neon_vrnd32zq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint32z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z"); } case NEON::BI__builtin_neon_vrnd64x_f32: case NEON::BI__builtin_neon_vrnd64xq_f32: case NEON::BI__builtin_neon_vrnd64x_f64: case NEON::BI__builtin_neon_vrnd64xq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64x; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x"); } case NEON::BI__builtin_neon_vrnd64z_f32: case NEON::BI__builtin_neon_vrnd64zq_f32: case NEON::BI__builtin_neon_vrnd64z_f64: case NEON::BI__builtin_neon_vrnd64zq_f64: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Intrinsic::aarch64_neon_frint64z; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z"); } case NEON::BI__builtin_neon_vrnd_v: case NEON::BI__builtin_neon_vrndq_v: { Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_trunc : Intrinsic::trunc; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); } case NEON::BI__builtin_neon_vcvt_f64_v: case NEON::BI__builtin_neon_vcvtq_f64_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); case NEON::BI__builtin_neon_vcvt_f64_f32: { assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && "unexpected vcvt_f64_f32 builtin"); NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); } case NEON::BI__builtin_neon_vcvt_f32_f64: { assert(Type.getEltType() == NeonTypeFlags::Float32 && "unexpected vcvt_f32_f64 builtin"); NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); } case NEON::BI__builtin_neon_vcvt_s32_v: case NEON::BI__builtin_neon_vcvt_u32_v: case NEON::BI__builtin_neon_vcvt_s64_v: case NEON::BI__builtin_neon_vcvt_u64_v: case NEON::BI__builtin_neon_vcvt_s16_f16: case NEON::BI__builtin_neon_vcvt_u16_f16: case NEON::BI__builtin_neon_vcvtq_s32_v: case NEON::BI__builtin_neon_vcvtq_u32_v: case NEON::BI__builtin_neon_vcvtq_s64_v: case NEON::BI__builtin_neon_vcvtq_u64_v: case NEON::BI__builtin_neon_vcvtq_s16_f16: case NEON::BI__builtin_neon_vcvtq_u16_f16: { Int = usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs; llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)}; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); } case NEON::BI__builtin_neon_vcvta_s16_f16: case NEON::BI__builtin_neon_vcvta_u16_f16: case NEON::BI__builtin_neon_vcvta_s32_v: case NEON::BI__builtin_neon_vcvtaq_s16_f16: case NEON::BI__builtin_neon_vcvtaq_s32_v: case NEON::BI__builtin_neon_vcvta_u32_v: case NEON::BI__builtin_neon_vcvtaq_u16_f16: case NEON::BI__builtin_neon_vcvtaq_u32_v: case NEON::BI__builtin_neon_vcvta_s64_v: case NEON::BI__builtin_neon_vcvtaq_s64_v: case NEON::BI__builtin_neon_vcvta_u64_v: case NEON::BI__builtin_neon_vcvtaq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); } case NEON::BI__builtin_neon_vcvtm_s16_f16: case NEON::BI__builtin_neon_vcvtm_s32_v: case NEON::BI__builtin_neon_vcvtmq_s16_f16: case NEON::BI__builtin_neon_vcvtmq_s32_v: case NEON::BI__builtin_neon_vcvtm_u16_f16: case NEON::BI__builtin_neon_vcvtm_u32_v: case NEON::BI__builtin_neon_vcvtmq_u16_f16: case NEON::BI__builtin_neon_vcvtmq_u32_v: case NEON::BI__builtin_neon_vcvtm_s64_v: case NEON::BI__builtin_neon_vcvtmq_s64_v: case NEON::BI__builtin_neon_vcvtm_u64_v: case NEON::BI__builtin_neon_vcvtmq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); } case NEON::BI__builtin_neon_vcvtn_s16_f16: case NEON::BI__builtin_neon_vcvtn_s32_v: case NEON::BI__builtin_neon_vcvtnq_s16_f16: case NEON::BI__builtin_neon_vcvtnq_s32_v: case NEON::BI__builtin_neon_vcvtn_u16_f16: case NEON::BI__builtin_neon_vcvtn_u32_v: case NEON::BI__builtin_neon_vcvtnq_u16_f16: case NEON::BI__builtin_neon_vcvtnq_u32_v: case NEON::BI__builtin_neon_vcvtn_s64_v: case NEON::BI__builtin_neon_vcvtnq_s64_v: case NEON::BI__builtin_neon_vcvtn_u64_v: case NEON::BI__builtin_neon_vcvtnq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); } case NEON::BI__builtin_neon_vcvtp_s16_f16: case NEON::BI__builtin_neon_vcvtp_s32_v: case NEON::BI__builtin_neon_vcvtpq_s16_f16: case NEON::BI__builtin_neon_vcvtpq_s32_v: case NEON::BI__builtin_neon_vcvtp_u16_f16: case NEON::BI__builtin_neon_vcvtp_u32_v: case NEON::BI__builtin_neon_vcvtpq_u16_f16: case NEON::BI__builtin_neon_vcvtpq_u32_v: case NEON::BI__builtin_neon_vcvtp_s64_v: case NEON::BI__builtin_neon_vcvtpq_s64_v: case NEON::BI__builtin_neon_vcvtp_u64_v: case NEON::BI__builtin_neon_vcvtpq_u64_v: { Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); } case NEON::BI__builtin_neon_vmulx_v: case NEON::BI__builtin_neon_vmulxq_v: { Int = Intrinsic::aarch64_neon_fmulx; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); } case NEON::BI__builtin_neon_vmulxh_lane_f16: case NEON::BI__builtin_neon_vmulxh_laneq_f16: { // vmulx_lane should be mapped to Neon scalar mulx after // extracting the scalar element Ops.push_back(EmitScalarExpr(E->getArg(2))); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); Ops.pop_back(); Int = Intrinsic::aarch64_neon_fmulx; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx"); } case NEON::BI__builtin_neon_vmul_lane_v: case NEON::BI__builtin_neon_vmul_laneq_v: { // v1f64 vmul_lane should be mapped to Neon scalar mul lane bool Quad = false; if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) Quad = true; Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); llvm::FixedVectorType *VTy = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); Ops[1] = Builder.CreateBitCast(Ops[1], VTy); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); return Builder.CreateBitCast(Result, Ty); } case NEON::BI__builtin_neon_vnegd_s64: return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); case NEON::BI__builtin_neon_vnegh_f16: return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh"); case NEON::BI__builtin_neon_vpmaxnm_v: case NEON::BI__builtin_neon_vpmaxnmq_v: { Int = Intrinsic::aarch64_neon_fmaxnmp; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); } case NEON::BI__builtin_neon_vpminnm_v: case NEON::BI__builtin_neon_vpminnmq_v: { Int = Intrinsic::aarch64_neon_fminnmp; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); } case NEON::BI__builtin_neon_vsqrth_f16: { Ops.push_back(EmitScalarExpr(E->getArg(0))); Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_sqrt : Intrinsic::sqrt; return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt"); } case NEON::BI__builtin_neon_vsqrt_v: case NEON::BI__builtin_neon_vsqrtq_v: { Int = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_sqrt : Intrinsic::sqrt; Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); } case NEON::BI__builtin_neon_vrbit_v: case NEON::BI__builtin_neon_vrbitq_v: { Int = Intrinsic::bitreverse; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); } case NEON::BI__builtin_neon_vaddv_u8: // FIXME: These are handled by the AArch64 scalar code. usgn = true; [[fallthrough]]; case NEON::BI__builtin_neon_vaddv_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vaddv_u16: usgn = true; [[fallthrough]]; case NEON::BI__builtin_neon_vaddv_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddvq_u8: usgn = true; [[fallthrough]]; case NEON::BI__builtin_neon_vaddvq_s8: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vaddvq_u16: usgn = true; [[fallthrough]]; case NEON::BI__builtin_neon_vaddvq_s16: { Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxv_u8: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxv_u16: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxvq_u8: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxvq_u16: { Int = Intrinsic::aarch64_neon_umaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxv_s8: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxv_s16: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxvq_s8: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vmaxvq_s16: { Int = Intrinsic::aarch64_neon_smaxv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vmaxv_f16: { Int = Intrinsic::aarch64_neon_fmaxv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vmaxvq_f16: { Int = Intrinsic::aarch64_neon_fmaxv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vminv_u8: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminv_u16: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminvq_u8: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminvq_u16: { Int = Intrinsic::aarch64_neon_uminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminv_s8: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminv_s16: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminvq_s8: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int8Ty); } case NEON::BI__builtin_neon_vminvq_s16: { Int = Intrinsic::aarch64_neon_sminv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vminv_f16: { Int = Intrinsic::aarch64_neon_fminv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vminvq_f16: { Int = Intrinsic::aarch64_neon_fminv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vmaxnmv_f16: { Int = Intrinsic::aarch64_neon_fmaxnmv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vmaxnmvq_f16: { Int = Intrinsic::aarch64_neon_fmaxnmv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vminnmv_f16: { Int = Intrinsic::aarch64_neon_fminnmv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vminnmvq_f16: { Int = Intrinsic::aarch64_neon_fminnmv; Ty = HalfTy; VTy = llvm::FixedVectorType::get(HalfTy, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vmul_n_f64: { Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); return Builder.CreateFMul(Ops[0], RHS); } case NEON::BI__builtin_neon_vaddlv_u8: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlv_u16: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); } case NEON::BI__builtin_neon_vaddlvq_u8: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlvq_u16: { Int = Intrinsic::aarch64_neon_uaddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); } case NEON::BI__builtin_neon_vaddlv_s8: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlv_s16: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 4); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); } case NEON::BI__builtin_neon_vaddlvq_s8: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int8Ty, 16); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); return Builder.CreateTrunc(Ops[0], Int16Ty); } case NEON::BI__builtin_neon_vaddlvq_s16: { Int = Intrinsic::aarch64_neon_saddlv; Ty = Int32Ty; VTy = llvm::FixedVectorType::get(Int16Ty, 8); llvm::Type *Tys[2] = { Ty, VTy }; Ops.push_back(EmitScalarExpr(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); } case NEON::BI__builtin_neon_vsri_n_v: case NEON::BI__builtin_neon_vsriq_n_v: { Int = Intrinsic::aarch64_neon_vsri; llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); return EmitNeonCall(Intrin, Ops, "vsri_n"); } case NEON::BI__builtin_neon_vsli_n_v: case NEON::BI__builtin_neon_vsliq_n_v: { Int = Intrinsic::aarch64_neon_vsli; llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); return EmitNeonCall(Intrin, Ops, "vsli_n"); } case NEON::BI__builtin_neon_vsra_n_v: case NEON::BI__builtin_neon_vsraq_n_v: Ops[0] = Builder.CreateBitCast(Ops[0], Ty); Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); return Builder.CreateAdd(Ops[0], Ops[1]); case NEON::BI__builtin_neon_vrsra_n_v: case NEON::BI__builtin_neon_vrsraq_n_v: { Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; SmallVector TmpOps; TmpOps.push_back(Ops[1]); TmpOps.push_back(Ops[2]); Function* F = CGM.getIntrinsic(Int, Ty); llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); Ops[0] = Builder.CreateBitCast(Ops[0], VTy); return Builder.CreateAdd(Ops[0], tmp); } case NEON::BI__builtin_neon_vld1_v: case NEON::BI__builtin_neon_vld1q_v: { return Builder.CreateAlignedLoad(VTy, Ops[0], PtrOp0.getAlignment()); } case NEON::BI__builtin_neon_vst1_v: case NEON::BI__builtin_neon_vst1q_v: Ops[1] = Builder.CreateBitCast(Ops[1], VTy); return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); case NEON::BI__builtin_neon_vld1_lane_v: case NEON::BI__builtin_neon_vld1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); } case NEON::BI__builtin_neon_vldap1_lane_s64: case NEON::BI__builtin_neon_vldap1q_lane_s64: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); llvm::LoadInst *LI = Builder.CreateAlignedLoad( VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); LI->setAtomic(llvm::AtomicOrdering::Acquire); Ops[0] = LI; return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vldap1_lane"); } case NEON::BI__builtin_neon_vld1_dup_v: case NEON::BI__builtin_neon_vld1q_dup_v: { Value *V = PoisonValue::get(Ty); Ops[0] = Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], PtrOp0.getAlignment()); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); return EmitNeonSplat(Ops[0], CI); } case NEON::BI__builtin_neon_vst1_lane_v: case NEON::BI__builtin_neon_vst1q_lane_v: Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); return Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); case NEON::BI__builtin_neon_vstl1_lane_s64: case NEON::BI__builtin_neon_vstl1q_lane_s64: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); llvm::StoreInst *SI = Builder.CreateAlignedStore(Ops[1], Ops[0], PtrOp0.getAlignment()); SI->setAtomic(llvm::AtomicOrdering::Release); return SI; } case NEON::BI__builtin_neon_vld2_v: case NEON::BI__builtin_neon_vld2q_v: { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_v: case NEON::BI__builtin_neon_vld3q_v: { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_v: case NEON::BI__builtin_neon_vld4q_v: { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_dup_v: case NEON::BI__builtin_neon_vld2q_dup_v: { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_dup_v: case NEON::BI__builtin_neon_vld3q_dup_v: { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_dup_v: case NEON::BI__builtin_neon_vld4q_dup_v: { llvm::Type *Tys[2] = {VTy, UnqualPtrTy}; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld2_lane_v: case NEON::BI__builtin_neon_vld2q_lane_v: { llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld2_lane"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld3_lane_v: case NEON::BI__builtin_neon_vld3q_lane_v: { llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateBitCast(Ops[3], Ty); Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld3_lane"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vld4_lane_v: case NEON::BI__builtin_neon_vld4q_lane_v: { llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); std::rotate(Ops.begin() + 1, Ops.begin() + 2, Ops.end()); Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Ops[3] = Builder.CreateBitCast(Ops[3], Ty); Ops[4] = Builder.CreateBitCast(Ops[4], Ty); Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); Ops[1] = Builder.CreateCall(F, ArrayRef(Ops).slice(1), "vld4_lane"); return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); } case NEON::BI__builtin_neon_vst2_v: case NEON::BI__builtin_neon_vst2q_v: { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst2_lane_v: case NEON::BI__builtin_neon_vst2q_lane_v: { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst3_v: case NEON::BI__builtin_neon_vst3q_v: { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst3_lane_v: case NEON::BI__builtin_neon_vst3q_lane_v: { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst4_v: case NEON::BI__builtin_neon_vst4q_v: { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), Ops, ""); } case NEON::BI__builtin_neon_vst4_lane_v: case NEON::BI__builtin_neon_vst4q_lane_v: { std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), Ops, ""); } case NEON::BI__builtin_neon_vtrn_v: case NEON::BI__builtin_neon_vtrnq_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back(i+vi); Indices.push_back(i+e+vi); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } case NEON::BI__builtin_neon_vuzp_v: case NEON::BI__builtin_neon_vuzpq_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) Indices.push_back(2*i+vi); Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } case NEON::BI__builtin_neon_vzip_v: case NEON::BI__builtin_neon_vzipq_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[2] = Builder.CreateBitCast(Ops[2], Ty); Value *SV = nullptr; for (unsigned vi = 0; vi != 2; ++vi) { SmallVector Indices; for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { Indices.push_back((i + vi*e) >> 1); Indices.push_back(((i + vi*e) >> 1)+e); } Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); SV = Builder.CreateDefaultAlignedStore(SV, Addr); } return SV; } case NEON::BI__builtin_neon_vqtbl1q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), Ops, "vtbl1"); } case NEON::BI__builtin_neon_vqtbl2q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), Ops, "vtbl2"); } case NEON::BI__builtin_neon_vqtbl3q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), Ops, "vtbl3"); } case NEON::BI__builtin_neon_vqtbl4q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), Ops, "vtbl4"); } case NEON::BI__builtin_neon_vqtbx1q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), Ops, "vtbx1"); } case NEON::BI__builtin_neon_vqtbx2q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), Ops, "vtbx2"); } case NEON::BI__builtin_neon_vqtbx3q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), Ops, "vtbx3"); } case NEON::BI__builtin_neon_vqtbx4q_v: { return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), Ops, "vtbx4"); } case NEON::BI__builtin_neon_vsqadd_v: case NEON::BI__builtin_neon_vsqaddq_v: { Int = Intrinsic::aarch64_neon_usqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); } case NEON::BI__builtin_neon_vuqadd_v: case NEON::BI__builtin_neon_vuqaddq_v: { Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } } } Value *CodeGenFunction::EmitBPFBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { assert((BuiltinID == BPF::BI__builtin_preserve_field_info || BuiltinID == BPF::BI__builtin_btf_type_id || BuiltinID == BPF::BI__builtin_preserve_type_info || BuiltinID == BPF::BI__builtin_preserve_enum_value) && "unexpected BPF builtin"); // A sequence number, injected into IR builtin functions, to // prevent CSE given the only difference of the function // may just be the debuginfo metadata. static uint32_t BuiltinSeqNum; switch (BuiltinID) { default: llvm_unreachable("Unexpected BPF builtin"); case BPF::BI__builtin_preserve_field_info: { const Expr *Arg = E->getArg(0); bool IsBitField = Arg->IgnoreParens()->getObjectKind() == OK_BitField; if (!getDebugInfo()) { CGM.Error(E->getExprLoc(), "using __builtin_preserve_field_info() without -g"); return IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this) : EmitLValue(Arg).emitRawPointer(*this); } // Enable underlying preserve_*_access_index() generation. bool OldIsInPreservedAIRegion = IsInPreservedAIRegion; IsInPreservedAIRegion = true; Value *FieldAddr = IsBitField ? EmitLValue(Arg).getRawBitFieldPointer(*this) : EmitLValue(Arg).emitRawPointer(*this); IsInPreservedAIRegion = OldIsInPreservedAIRegion; ConstantInt *C = cast(EmitScalarExpr(E->getArg(1))); Value *InfoKind = ConstantInt::get(Int64Ty, C->getSExtValue()); // Built the IR for the preserve_field_info intrinsic. llvm::Function *FnGetFieldInfo = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::bpf_preserve_field_info, {FieldAddr->getType()}); return Builder.CreateCall(FnGetFieldInfo, {FieldAddr, InfoKind}); } case BPF::BI__builtin_btf_type_id: case BPF::BI__builtin_preserve_type_info: { if (!getDebugInfo()) { CGM.Error(E->getExprLoc(), "using builtin function without -g"); return nullptr; } const Expr *Arg0 = E->getArg(0); llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( Arg0->getType(), Arg0->getExprLoc()); ConstantInt *Flag = cast(EmitScalarExpr(E->getArg(1))); Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); llvm::Function *FnDecl; if (BuiltinID == BPF::BI__builtin_btf_type_id) FnDecl = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::bpf_btf_type_id, {}); else FnDecl = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::bpf_preserve_type_info, {}); CallInst *Fn = Builder.CreateCall(FnDecl, {SeqNumVal, FlagValue}); Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } case BPF::BI__builtin_preserve_enum_value: { if (!getDebugInfo()) { CGM.Error(E->getExprLoc(), "using builtin function without -g"); return nullptr; } const Expr *Arg0 = E->getArg(0); llvm::DIType *DbgInfo = getDebugInfo()->getOrCreateStandaloneType( Arg0->getType(), Arg0->getExprLoc()); // Find enumerator const auto *UO = cast(Arg0->IgnoreParens()); const auto *CE = cast(UO->getSubExpr()); const auto *DR = cast(CE->getSubExpr()); const auto *Enumerator = cast(DR->getDecl()); auto InitVal = Enumerator->getInitVal(); std::string InitValStr; if (InitVal.isNegative() || InitVal > uint64_t(INT64_MAX)) InitValStr = std::to_string(InitVal.getSExtValue()); else InitValStr = std::to_string(InitVal.getZExtValue()); std::string EnumStr = Enumerator->getNameAsString() + ":" + InitValStr; Value *EnumStrVal = Builder.CreateGlobalStringPtr(EnumStr); ConstantInt *Flag = cast(EmitScalarExpr(E->getArg(1))); Value *FlagValue = ConstantInt::get(Int64Ty, Flag->getSExtValue()); Value *SeqNumVal = ConstantInt::get(Int32Ty, BuiltinSeqNum++); llvm::Function *IntrinsicFn = llvm::Intrinsic::getDeclaration( &CGM.getModule(), llvm::Intrinsic::bpf_preserve_enum_value, {}); CallInst *Fn = Builder.CreateCall(IntrinsicFn, {SeqNumVal, EnumStrVal, FlagValue}); Fn->setMetadata(LLVMContext::MD_preserve_access_index, DbgInfo); return Fn; } } } llvm::Value *CodeGenFunction:: BuildVector(ArrayRef Ops) { assert((Ops.size() & (Ops.size() - 1)) == 0 && "Not a power-of-two sized vector!"); bool AllConstants = true; for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) AllConstants &= isa(Ops[i]); // If this is a constant vector, create a ConstantVector. if (AllConstants) { SmallVector CstOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) CstOps.push_back(cast(Ops[i])); return llvm::ConstantVector::get(CstOps); } // Otherwise, insertelement the values to build the vector. Value *Result = llvm::PoisonValue::get( llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size())); for (unsigned i = 0, e = Ops.size(); i != e; ++i) Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt64(i)); return Result; } // Convert the mask from an integer type to a vector of i1. static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts) { auto *MaskTy = llvm::FixedVectorType::get( CGF.Builder.getInt1Ty(), cast(Mask->getType())->getBitWidth()); Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); // If we have less than 8 elements, then the starting mask was an i8 and // we need to extract down to the right number of elements. if (NumElts < 8) { int Indices[4]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; MaskVec = CGF.Builder.CreateShuffleVector( MaskVec, MaskVec, ArrayRef(Indices, NumElts), "extract"); } return MaskVec; } static Value *EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef Ops, Align Alignment) { Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue( CGF, Ops[2], cast(Ops[1]->getType())->getNumElements()); return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Alignment, MaskVec); } static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef Ops, Align Alignment) { llvm::Type *Ty = Ops[1]->getType(); Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue( CGF, Ops[2], cast(Ty)->getNumElements()); return CGF.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, MaskVec, Ops[1]); } static Value *EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef Ops) { auto *ResultTy = cast(Ops[1]->getType()); Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue( CGF, Ops[2], cast(ResultTy)->getNumElements()); llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload, ResultTy); return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] }); } static Value *EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef Ops, bool IsCompress) { auto *ResultTy = cast(Ops[1]->getType()); Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress : Intrinsic::x86_avx512_mask_expand; llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy); return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec }); } static Value *EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef Ops) { auto *ResultTy = cast(Ops[1]->getType()); Value *Ptr = Ops[0]; Value *MaskVec = getMaskVecValue(CGF, Ops[2], ResultTy->getNumElements()); llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore, ResultTy); return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec }); } static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef Ops, bool InvertLHS = false) { unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts); Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts); if (InvertLHS) LHS = CGF.Builder.CreateNot(LHS); return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS), Ops[0]->getType()); } static Value *EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight) { llvm::Type *Ty = Op0->getType(); // Amount may be scalar immediate, in which case create a splat vector. // Funnel shifts amounts are treated as modulo and types are all power-of-2 so // we only care about the lowest log2 bits anyway. if (Amt->getType() != Ty) { unsigned NumElts = cast(Ty)->getNumElements(); Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false); Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt); } unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl; Function *F = CGF.CGM.getIntrinsic(IID, Ty); return CGF.Builder.CreateCall(F, {Op0, Op1, Amt}); } static Value *EmitX86vpcom(CodeGenFunction &CGF, ArrayRef Ops, bool IsSigned) { Value *Op0 = Ops[0]; Value *Op1 = Ops[1]; llvm::Type *Ty = Op0->getType(); uint64_t Imm = cast(Ops[2])->getZExtValue() & 0x7; CmpInst::Predicate Pred; switch (Imm) { case 0x0: Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; case 0x1: Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; case 0x2: Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; case 0x3: Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; case 0x4: Pred = ICmpInst::ICMP_EQ; break; case 0x5: Pred = ICmpInst::ICMP_NE; break; case 0x6: return llvm::Constant::getNullValue(Ty); // FALSE case 0x7: return llvm::Constant::getAllOnesValue(Ty); // TRUE default: llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate"); } Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1); Value *Res = CGF.Builder.CreateSExt(Cmp, Ty); return Res; } static Value *EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { // If the mask is all ones just return first argument. if (const auto *C = dyn_cast(Mask)) if (C->isAllOnesValue()) return Op0; Mask = getMaskVecValue( CGF, Mask, cast(Op0->getType())->getNumElements()); return CGF.Builder.CreateSelect(Mask, Op0, Op1); } static Value *EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1) { // If the mask is all ones just return first argument. if (const auto *C = dyn_cast(Mask)) if (C->isAllOnesValue()) return Op0; auto *MaskTy = llvm::FixedVectorType::get( CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth()); Mask = CGF.Builder.CreateBitCast(Mask, MaskTy); Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0); return CGF.Builder.CreateSelect(Mask, Op0, Op1); } static Value *EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn) { if (MaskIn) { const auto *C = dyn_cast(MaskIn); if (!C || !C->isAllOnesValue()) Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts)); } if (NumElts < 8) { int Indices[8]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; for (unsigned i = NumElts; i != 8; ++i) Indices[i] = i % NumElts + NumElts; Cmp = CGF.Builder.CreateShuffleVector( Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); } return CGF.Builder.CreateBitCast(Cmp, IntegerType::get(CGF.getLLVMContext(), std::max(NumElts, 8U))); } static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef Ops) { assert((Ops.size() == 2 || Ops.size() == 4) && "Unexpected number of arguments"); unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Value *Cmp; if (CC == 3) { Cmp = Constant::getNullValue( llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); } else if (CC == 7) { Cmp = Constant::getAllOnesValue( llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts)); } else { ICmpInst::Predicate Pred; switch (CC) { default: llvm_unreachable("Unknown condition code"); case 0: Pred = ICmpInst::ICMP_EQ; break; case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; case 4: Pred = ICmpInst::ICMP_NE; break; case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; } Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); } Value *MaskIn = nullptr; if (Ops.size() == 4) MaskIn = Ops[3]; return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn); } static Value *EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In) { Value *Zero = Constant::getNullValue(In->getType()); return EmitX86MaskedCompare(CGF, 1, true, { In, Zero }); } static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF, const CallExpr *E, ArrayRef Ops, bool IsSigned) { unsigned Rnd = cast(Ops[3])->getZExtValue(); llvm::Type *Ty = Ops[1]->getType(); Value *Res; if (Rnd != 4) { Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round : Intrinsic::x86_avx512_uitofp_round; Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() }); Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] }); } else { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty) : CGF.Builder.CreateUIToFP(Ops[0], Ty); } return EmitX86Select(CGF, Ops[2], Res, Ops[1]); } // Lowers X86 FMA intrinsics to IR. static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef Ops, unsigned BuiltinID, bool IsAddSub) { bool Subtract = false; Intrinsic::ID IID = Intrinsic::not_intrinsic; switch (BuiltinID) { default: break; case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: Subtract = true; [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddph512_mask: case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: IID = llvm::Intrinsic::x86_avx512fp16_vfmadd_ph_512; break; case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: Subtract = true; [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: IID = llvm::Intrinsic::x86_avx512fp16_vfmaddsub_ph_512; break; case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: Subtract = true; [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddps512_mask: case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break; case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: Subtract = true; [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break; case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: Subtract = true; [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512; break; case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: Subtract = true; [[fallthrough]]; case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512; break; } Value *A = Ops[0]; Value *B = Ops[1]; Value *C = Ops[2]; if (Subtract) C = CGF.Builder.CreateFNeg(C); Value *Res; // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding). if (IID != Intrinsic::not_intrinsic && (cast(Ops.back())->getZExtValue() != (uint64_t)4 || IsAddSub)) { Function *Intr = CGF.CGM.getIntrinsic(IID); Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() }); } else { llvm::Type *Ty = A->getType(); Function *FMA; if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); FMA = CGF.CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, Ty); Res = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, C}); } else { FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); Res = CGF.Builder.CreateCall(FMA, {A, B, C}); } } // Handle any required masking. Value *MaskFalseVal = nullptr; switch (BuiltinID) { case clang::X86::BI__builtin_ia32_vfmaddph512_mask: case clang::X86::BI__builtin_ia32_vfmaddps512_mask: case clang::X86::BI__builtin_ia32_vfmaddpd512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask: MaskFalseVal = Ops[0]; break; case clang::X86::BI__builtin_ia32_vfmaddph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz: MaskFalseVal = Constant::getNullValue(Ops[0]->getType()); break; case clang::X86::BI__builtin_ia32_vfmsubph512_mask3: case clang::X86::BI__builtin_ia32_vfmaddph512_mask3: case clang::X86::BI__builtin_ia32_vfmsubps512_mask3: case clang::X86::BI__builtin_ia32_vfmaddps512_mask3: case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3: case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3: case clang::X86::BI__builtin_ia32_vfmsubaddph512_mask3: case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask3: case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3: case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3: case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3: case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3: MaskFalseVal = Ops[2]; break; } if (MaskFalseVal) return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal); return Res; } static Value *EmitScalarFMAExpr(CodeGenFunction &CGF, const CallExpr *E, MutableArrayRef Ops, Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0, bool NegAcc = false) { unsigned Rnd = 4; if (Ops.size() > 4) Rnd = cast(Ops[4])->getZExtValue(); if (NegAcc) Ops[2] = CGF.Builder.CreateFNeg(Ops[2]); Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0); Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0); Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0); Value *Res; if (Rnd != 4) { Intrinsic::ID IID; switch (Ops[0]->getType()->getPrimitiveSizeInBits()) { case 16: IID = Intrinsic::x86_avx512fp16_vfmadd_f16; break; case 32: IID = Intrinsic::x86_avx512_vfmadd_f32; break; case 64: IID = Intrinsic::x86_avx512_vfmadd_f64; break; default: llvm_unreachable("Unexpected size"); } Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2], Ops[4]}); } else if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *FMA = CGF.CGM.getIntrinsic( Intrinsic::experimental_constrained_fma, Ops[0]->getType()); Res = CGF.Builder.CreateConstrainedFPCall(FMA, Ops.slice(0, 3)); } else { Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType()); Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3)); } // If we have more than 3 arguments, we need to do masking. if (Ops.size() > 3) { Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType()) : Ops[PTIdx]; // If we negated the accumulator and the its the PassThru value we need to // bypass the negate. Conveniently Upper should be the same thing in this // case. if (NegAcc && PTIdx == 2) PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0); Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru); } return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0); } static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef Ops) { llvm::Type *Ty = Ops[0]->getType(); // Arguments have a vXi32 type so cast to vXi64. Ty = llvm::FixedVectorType::get(CGF.Int64Ty, Ty->getPrimitiveSizeInBits() / 64); Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty); Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty); if (IsSigned) { // Shift left then arithmetic shift right. Constant *ShiftAmt = ConstantInt::get(Ty, 32); LHS = CGF.Builder.CreateShl(LHS, ShiftAmt); LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt); RHS = CGF.Builder.CreateShl(RHS, ShiftAmt); RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt); } else { // Clear the upper bits. Constant *Mask = ConstantInt::get(Ty, 0xffffffff); LHS = CGF.Builder.CreateAnd(LHS, Mask); RHS = CGF.Builder.CreateAnd(RHS, Mask); } return CGF.Builder.CreateMul(LHS, RHS); } // Emit a masked pternlog intrinsic. This only exists because the header has to // use a macro and we aren't able to pass the input argument to a pternlog // builtin and a select builtin without evaluating it twice. static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef Ops) { llvm::Type *Ty = Ops[0]->getType(); unsigned VecWidth = Ty->getPrimitiveSizeInBits(); unsigned EltWidth = Ty->getScalarSizeInBits(); Intrinsic::ID IID; if (VecWidth == 128 && EltWidth == 32) IID = Intrinsic::x86_avx512_pternlog_d_128; else if (VecWidth == 256 && EltWidth == 32) IID = Intrinsic::x86_avx512_pternlog_d_256; else if (VecWidth == 512 && EltWidth == 32) IID = Intrinsic::x86_avx512_pternlog_d_512; else if (VecWidth == 128 && EltWidth == 64) IID = Intrinsic::x86_avx512_pternlog_q_128; else if (VecWidth == 256 && EltWidth == 64) IID = Intrinsic::x86_avx512_pternlog_q_256; else if (VecWidth == 512 && EltWidth == 64) IID = Intrinsic::x86_avx512_pternlog_q_512; else llvm_unreachable("Unexpected intrinsic"); Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID), Ops.drop_back()); Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0]; return EmitX86Select(CGF, Ops[4], Ternlog, PassThru); } static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy) { unsigned NumberOfElements = cast(DstTy)->getNumElements(); Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements); return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2"); } Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast(CPUExpr)->getString(); return EmitX86CpuIs(CPUStr); } // Convert F16 halfs to floats. static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF, ArrayRef Ops, llvm::Type *DstTy) { assert((Ops.size() == 1 || Ops.size() == 3 || Ops.size() == 4) && "Unknown cvtph2ps intrinsic"); // If the SAE intrinsic doesn't use default rounding then we can't upgrade. if (Ops.size() == 4 && cast(Ops[3])->getZExtValue() != 4) { Function *F = CGF.CGM.getIntrinsic(Intrinsic::x86_avx512_mask_vcvtph2ps_512); return CGF.Builder.CreateCall(F, {Ops[0], Ops[1], Ops[2], Ops[3]}); } unsigned NumDstElts = cast(DstTy)->getNumElements(); Value *Src = Ops[0]; // Extract the subvector. if (NumDstElts != cast(Src->getType())->getNumElements()) { assert(NumDstElts == 4 && "Unexpected vector size"); Src = CGF.Builder.CreateShuffleVector(Src, ArrayRef{0, 1, 2, 3}); } // Bitcast from vXi16 to vXf16. auto *HalfTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts); Src = CGF.Builder.CreateBitCast(Src, HalfTy); // Perform the fp-extension. Value *Res = CGF.Builder.CreateFPExt(Src, DstTy, "cvtph2ps"); if (Ops.size() >= 3) Res = EmitX86Select(CGF, Ops[2], Res, Ops[1]); return Res; } Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) { llvm::Type *Int32Ty = Builder.getInt32Ty(); // Matching the struct layout from the compiler-rt/libgcc structure that is // filled in: // unsigned int __cpu_vendor; // unsigned int __cpu_type; // unsigned int __cpu_subtype; // unsigned int __cpu_features[1]; llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1)); // Grab the global __cpu_model. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); cast(CpuModel)->setDSOLocal(true); // Calculate the index needed to access the correct field based on the // range. Also adjust the expected value. unsigned Index; unsigned Value; std::tie(Index, Value) = StringSwitch>(CPUStr) #define X86_VENDOR(ENUM, STRING) \ .Case(STRING, {0u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) \ .Case(ALIAS, {1u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_TYPE(ENUM, STR) \ .Case(STR, {1u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_SUBTYPE_ALIAS(ENUM, ALIAS) \ .Case(ALIAS, {2u, static_cast(llvm::X86::ENUM)}) #define X86_CPU_SUBTYPE(ENUM, STR) \ .Case(STR, {2u, static_cast(llvm::X86::ENUM)}) #include "llvm/TargetParser/X86TargetParser.def" .Default({0, 0}); assert(Value != 0 && "Invalid CPUStr passed to CpuIs"); // Grab the appropriate field from __cpu_model. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, Index)}; llvm::Value *CpuValue = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); CpuValue = Builder.CreateAlignedLoad(Int32Ty, CpuValue, CharUnits::fromQuantity(4)); // Check the value of the field against the requested value. return Builder.CreateICmpEQ(CpuValue, llvm::ConstantInt::get(Int32Ty, Value)); } Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) { const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); StringRef FeatureStr = cast(FeatureExpr)->getString(); if (!getContext().getTargetInfo().validateCpuSupports(FeatureStr)) return Builder.getFalse(); return EmitX86CpuSupports(FeatureStr); } Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef FeatureStrs) { return EmitX86CpuSupports(llvm::X86::getCpuSupportsMask(FeatureStrs)); } llvm::Value * CodeGenFunction::EmitX86CpuSupports(std::array FeatureMask) { Value *Result = Builder.getTrue(); if (FeatureMask[0] != 0) { // Matching the struct layout from the compiler-rt/libgcc structure that is // filled in: // unsigned int __cpu_vendor; // unsigned int __cpu_type; // unsigned int __cpu_subtype; // unsigned int __cpu_features[1]; llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1)); // Grab the global __cpu_model. llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); cast(CpuModel)->setDSOLocal(true); // Grab the first (0th) element from the field __cpu_features off of the // global in the struct STy. Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3), Builder.getInt32(0)}; Value *CpuFeatures = Builder.CreateInBoundsGEP(STy, CpuModel, Idxs); Value *Features = Builder.CreateAlignedLoad(Int32Ty, CpuFeatures, CharUnits::fromQuantity(4)); // Check the value of the bit corresponding to the feature requested. Value *Mask = Builder.getInt32(FeatureMask[0]); Value *Bitset = Builder.CreateAnd(Features, Mask); Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); Result = Builder.CreateAnd(Result, Cmp); } llvm::Type *ATy = llvm::ArrayType::get(Int32Ty, 3); llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(ATy, "__cpu_features2"); cast(CpuFeatures2)->setDSOLocal(true); for (int i = 1; i != 4; ++i) { const uint32_t M = FeatureMask[i]; if (!M) continue; Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(i - 1)}; Value *Features = Builder.CreateAlignedLoad( Int32Ty, Builder.CreateInBoundsGEP(ATy, CpuFeatures2, Idxs), CharUnits::fromQuantity(4)); // Check the value of the bit corresponding to the feature requested. Value *Mask = Builder.getInt32(M); Value *Bitset = Builder.CreateAnd(Features, Mask); Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); Result = Builder.CreateAnd(Result, Cmp); } return Result; } Value *CodeGenFunction::EmitAArch64CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, "__init_cpu_features_resolver"); cast(Func.getCallee())->setDSOLocal(true); cast(Func.getCallee()) ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } Value *CodeGenFunction::EmitX86CpuInit() { llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, /*Variadic*/ false); llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init"); cast(Func.getCallee())->setDSOLocal(true); cast(Func.getCallee()) ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); return Builder.CreateCall(Func); } Value *CodeGenFunction::EmitAArch64CpuSupports(const CallExpr *E) { const Expr *ArgExpr = E->getArg(0)->IgnoreParenCasts(); StringRef ArgStr = cast(ArgExpr)->getString(); llvm::SmallVector Features; ArgStr.split(Features, "+"); for (auto &Feature : Features) { Feature = Feature.trim(); if (!llvm::AArch64::parseFMVExtension(Feature)) return Builder.getFalse(); if (Feature != "default") Features.push_back(Feature); } return EmitAArch64CpuSupports(Features); } llvm::Value * CodeGenFunction::EmitAArch64CpuSupports(ArrayRef FeaturesStrs) { uint64_t FeaturesMask = llvm::AArch64::getCpuSupportsMask(FeaturesStrs); Value *Result = Builder.getTrue(); if (FeaturesMask != 0) { // Get features from structure in runtime library // struct { // unsigned long long features; // } __aarch64_cpu_features; llvm::Type *STy = llvm::StructType::get(Int64Ty); llvm::Constant *AArch64CPUFeatures = CGM.CreateRuntimeVariable(STy, "__aarch64_cpu_features"); cast(AArch64CPUFeatures)->setDSOLocal(true); llvm::Value *CpuFeatures = Builder.CreateGEP( STy, AArch64CPUFeatures, {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, 0)}); Value *Features = Builder.CreateAlignedLoad(Int64Ty, CpuFeatures, CharUnits::fromQuantity(8)); Value *Mask = Builder.getInt64(FeaturesMask); Value *Bitset = Builder.CreateAnd(Features, Mask); Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask); Result = Builder.CreateAnd(Result, Cmp); } return Result; } Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (BuiltinID == Builtin::BI__builtin_cpu_is) return EmitX86CpuIs(E); if (BuiltinID == Builtin::BI__builtin_cpu_supports) return EmitX86CpuSupports(E); if (BuiltinID == Builtin::BI__builtin_cpu_init) return EmitX86CpuInit(); // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional MsvcIntId = translateX86ToMsvcIntrin(BuiltinID)) return EmitMSVCBuiltinExpr(*MsvcIntId, E); SmallVector Ops; bool IsMaskFCmp = false; bool IsConjFMA = false; // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } // These exist so that the builtin that takes an immediate can be bounds // checked by clang to avoid passing bad immediates to the backend. Since // AVX has a larger immediate than SSE we would need separate builtins to // do the different bounds checking. Rather than create a clang specific // SSE only builtin, this implements eight separate builtins to match gcc // implementation. auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops); }; // For the vector forms of FP comparisons, translate the builtins directly to // IR. // TODO: The builtins could be removed if the SSE header files used vector // extension comparisons directly (vector ordered/unordered may need // additional support via __builtin_isnan()). auto getVectorFCmpIR = [this, &Ops, E](CmpInst::Predicate Pred, bool IsSignaling) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *Cmp; if (IsSignaling) Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); else Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); llvm::VectorType *FPVecTy = cast(Ops[0]->getType()); llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); return Builder.CreateBitCast(Sext, FPVecTy); }; switch (BuiltinID) { default: return nullptr; case X86::BI_mm_prefetch: { Value *Address = Ops[0]; ConstantInt *C = cast(Ops[1]); Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1); Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3); Value *Data = ConstantInt::get(Int32Ty, 1); Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); return Builder.CreateCall(F, {Address, RW, Locality, Data}); } case X86::BI_mm_clflush: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush), Ops[0]); } case X86::BI_mm_lfence: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence)); } case X86::BI_mm_mfence: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence)); } case X86::BI_mm_sfence: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence)); } case X86::BI_mm_pause: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause)); } case X86::BI__rdtsc: { return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc)); } case X86::BI__builtin_ia32_rdtscp: { Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp)); Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), Ops[0]); return Builder.CreateExtractValue(Call, 0); } case X86::BI__builtin_ia32_lzcnt_u16: case X86::BI__builtin_ia32_lzcnt_u32: case X86::BI__builtin_ia32_lzcnt_u64: { Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_tzcnt_u16: case X86::BI__builtin_ia32_tzcnt_u32: case X86::BI__builtin_ia32_tzcnt_u64: { Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); } case X86::BI__builtin_ia32_undef128: case X86::BI__builtin_ia32_undef256: case X86::BI__builtin_ia32_undef512: // The x86 definition of "undef" is not the same as the LLVM definition // (PR32176). We leave optimizing away an unnecessary zero constant to the // IR optimizer and backend. // TODO: If we had a "freeze" IR instruction to generate a fixed undef // value, we should use that here instead of a zero. return llvm::Constant::getNullValue(ConvertType(E->getType())); case X86::BI__builtin_ia32_vec_init_v8qi: case X86::BI__builtin_ia32_vec_init_v4hi: case X86::BI__builtin_ia32_vec_init_v2si: return Builder.CreateBitCast(BuildVector(Ops), llvm::Type::getX86_MMXTy(getLLVMContext())); case X86::BI__builtin_ia32_vec_ext_v2si: case X86::BI__builtin_ia32_vec_ext_v16qi: case X86::BI__builtin_ia32_vec_ext_v8hi: case X86::BI__builtin_ia32_vec_ext_v4si: case X86::BI__builtin_ia32_vec_ext_v4sf: case X86::BI__builtin_ia32_vec_ext_v2di: case X86::BI__builtin_ia32_vec_ext_v32qi: case X86::BI__builtin_ia32_vec_ext_v16hi: case X86::BI__builtin_ia32_vec_ext_v8si: case X86::BI__builtin_ia32_vec_ext_v4di: { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); uint64_t Index = cast(Ops[1])->getZExtValue(); Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. // Otherwise we could just do this in the header file. return Builder.CreateExtractElement(Ops[0], Index); } case X86::BI__builtin_ia32_vec_set_v16qi: case X86::BI__builtin_ia32_vec_set_v8hi: case X86::BI__builtin_ia32_vec_set_v4si: case X86::BI__builtin_ia32_vec_set_v2di: case X86::BI__builtin_ia32_vec_set_v32qi: case X86::BI__builtin_ia32_vec_set_v16hi: case X86::BI__builtin_ia32_vec_set_v8si: case X86::BI__builtin_ia32_vec_set_v4di: { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); unsigned Index = cast(Ops[2])->getZExtValue(); Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. // Otherwise we could just do this in the header file. return Builder.CreateInsertElement(Ops[0], Ops[1], Index); } case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { RawAddress Tmp = CreateMemTemp(E->getArg(0)->getType()); Builder.CreateStore(Ops[0], Tmp); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), Tmp.getPointer()); } case X86::BI_mm_getcsr: case X86::BI__builtin_ia32_stmxcsr: { RawAddress Tmp = CreateMemTemp(E->getType()); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), Tmp.getPointer()); return Builder.CreateLoad(Tmp, "stmxcsr"); } case X86::BI__builtin_ia32_xsave: case X86::BI__builtin_ia32_xsave64: case X86::BI__builtin_ia32_xrstor: case X86::BI__builtin_ia32_xrstor64: case X86::BI__builtin_ia32_xsaveopt: case X86::BI__builtin_ia32_xsaveopt64: case X86::BI__builtin_ia32_xrstors: case X86::BI__builtin_ia32_xrstors64: case X86::BI__builtin_ia32_xsavec: case X86::BI__builtin_ia32_xsavec64: case X86::BI__builtin_ia32_xsaves: case X86::BI__builtin_ia32_xsaves64: case X86::BI__builtin_ia32_xsetbv: case X86::BI_xsetbv: { Intrinsic::ID ID; #define INTRINSIC_X86_XSAVE_ID(NAME) \ case X86::BI__builtin_ia32_##NAME: \ ID = Intrinsic::x86_##NAME; \ break switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); INTRINSIC_X86_XSAVE_ID(xsave); INTRINSIC_X86_XSAVE_ID(xsave64); INTRINSIC_X86_XSAVE_ID(xrstor); INTRINSIC_X86_XSAVE_ID(xrstor64); INTRINSIC_X86_XSAVE_ID(xsaveopt); INTRINSIC_X86_XSAVE_ID(xsaveopt64); INTRINSIC_X86_XSAVE_ID(xrstors); INTRINSIC_X86_XSAVE_ID(xrstors64); INTRINSIC_X86_XSAVE_ID(xsavec); INTRINSIC_X86_XSAVE_ID(xsavec64); INTRINSIC_X86_XSAVE_ID(xsaves); INTRINSIC_X86_XSAVE_ID(xsaves64); INTRINSIC_X86_XSAVE_ID(xsetbv); case X86::BI_xsetbv: ID = Intrinsic::x86_xsetbv; break; } #undef INTRINSIC_X86_XSAVE_ID Value *Mhi = Builder.CreateTrunc( Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); Ops[1] = Mhi; Ops.push_back(Mlo); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } case X86::BI__builtin_ia32_xgetbv: case X86::BI_xgetbv: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops); case X86::BI__builtin_ia32_storedqudi128_mask: case X86::BI__builtin_ia32_storedqusi128_mask: case X86::BI__builtin_ia32_storedquhi128_mask: case X86::BI__builtin_ia32_storedquqi128_mask: case X86::BI__builtin_ia32_storeupd128_mask: case X86::BI__builtin_ia32_storeups128_mask: case X86::BI__builtin_ia32_storedqudi256_mask: case X86::BI__builtin_ia32_storedqusi256_mask: case X86::BI__builtin_ia32_storedquhi256_mask: case X86::BI__builtin_ia32_storedquqi256_mask: case X86::BI__builtin_ia32_storeupd256_mask: case X86::BI__builtin_ia32_storeups256_mask: case X86::BI__builtin_ia32_storedqudi512_mask: case X86::BI__builtin_ia32_storedqusi512_mask: case X86::BI__builtin_ia32_storedquhi512_mask: case X86::BI__builtin_ia32_storedquqi512_mask: case X86::BI__builtin_ia32_storeupd512_mask: case X86::BI__builtin_ia32_storeups512_mask: return EmitX86MaskedStore(*this, Ops, Align(1)); case X86::BI__builtin_ia32_storesh128_mask: case X86::BI__builtin_ia32_storess128_mask: case X86::BI__builtin_ia32_storesd128_mask: return EmitX86MaskedStore(*this, Ops, Align(1)); case X86::BI__builtin_ia32_vpopcntb_128: case X86::BI__builtin_ia32_vpopcntd_128: case X86::BI__builtin_ia32_vpopcntq_128: case X86::BI__builtin_ia32_vpopcntw_128: case X86::BI__builtin_ia32_vpopcntb_256: case X86::BI__builtin_ia32_vpopcntd_256: case X86::BI__builtin_ia32_vpopcntq_256: case X86::BI__builtin_ia32_vpopcntw_256: case X86::BI__builtin_ia32_vpopcntb_512: case X86::BI__builtin_ia32_vpopcntd_512: case X86::BI__builtin_ia32_vpopcntq_512: case X86::BI__builtin_ia32_vpopcntw_512: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); return Builder.CreateCall(F, Ops); } case X86::BI__builtin_ia32_cvtmask2b128: case X86::BI__builtin_ia32_cvtmask2b256: case X86::BI__builtin_ia32_cvtmask2b512: case X86::BI__builtin_ia32_cvtmask2w128: case X86::BI__builtin_ia32_cvtmask2w256: case X86::BI__builtin_ia32_cvtmask2w512: case X86::BI__builtin_ia32_cvtmask2d128: case X86::BI__builtin_ia32_cvtmask2d256: case X86::BI__builtin_ia32_cvtmask2d512: case X86::BI__builtin_ia32_cvtmask2q128: case X86::BI__builtin_ia32_cvtmask2q256: case X86::BI__builtin_ia32_cvtmask2q512: return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType())); case X86::BI__builtin_ia32_cvtb2mask128: case X86::BI__builtin_ia32_cvtb2mask256: case X86::BI__builtin_ia32_cvtb2mask512: case X86::BI__builtin_ia32_cvtw2mask128: case X86::BI__builtin_ia32_cvtw2mask256: case X86::BI__builtin_ia32_cvtw2mask512: case X86::BI__builtin_ia32_cvtd2mask128: case X86::BI__builtin_ia32_cvtd2mask256: case X86::BI__builtin_ia32_cvtd2mask512: case X86::BI__builtin_ia32_cvtq2mask128: case X86::BI__builtin_ia32_cvtq2mask256: case X86::BI__builtin_ia32_cvtq2mask512: return EmitX86ConvertToMask(*this, Ops[0]); case X86::BI__builtin_ia32_cvtdq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2ps512_mask: case X86::BI__builtin_ia32_cvtqq2pd512_mask: case X86::BI__builtin_ia32_vcvtw2ph512_mask: case X86::BI__builtin_ia32_vcvtdq2ph512_mask: case X86::BI__builtin_ia32_vcvtqq2ph512_mask: return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true); case X86::BI__builtin_ia32_cvtudq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2ps512_mask: case X86::BI__builtin_ia32_cvtuqq2pd512_mask: case X86::BI__builtin_ia32_vcvtuw2ph512_mask: case X86::BI__builtin_ia32_vcvtudq2ph512_mask: case X86::BI__builtin_ia32_vcvtuqq2ph512_mask: return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false); case X86::BI__builtin_ia32_vfmaddss3: case X86::BI__builtin_ia32_vfmaddsd3: case X86::BI__builtin_ia32_vfmaddsh3_mask: case X86::BI__builtin_ia32_vfmaddss3_mask: case X86::BI__builtin_ia32_vfmaddsd3_mask: return EmitScalarFMAExpr(*this, E, Ops, Ops[0]); case X86::BI__builtin_ia32_vfmaddss: case X86::BI__builtin_ia32_vfmaddsd: return EmitScalarFMAExpr(*this, E, Ops, Constant::getNullValue(Ops[0]->getType())); case X86::BI__builtin_ia32_vfmaddsh3_maskz: case X86::BI__builtin_ia32_vfmaddss3_maskz: case X86::BI__builtin_ia32_vfmaddsd3_maskz: return EmitScalarFMAExpr(*this, E, Ops, Ops[0], /*ZeroMask*/ true); case X86::BI__builtin_ia32_vfmaddsh3_mask3: case X86::BI__builtin_ia32_vfmaddss3_mask3: case X86::BI__builtin_ia32_vfmaddsd3_mask3: return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2); case X86::BI__builtin_ia32_vfmsubsh3_mask3: case X86::BI__builtin_ia32_vfmsubss3_mask3: case X86::BI__builtin_ia32_vfmsubsd3_mask3: return EmitScalarFMAExpr(*this, E, Ops, Ops[2], /*ZeroMask*/ false, 2, /*NegAcc*/ true); case X86::BI__builtin_ia32_vfmaddph: case X86::BI__builtin_ia32_vfmaddps: case X86::BI__builtin_ia32_vfmaddpd: case X86::BI__builtin_ia32_vfmaddph256: case X86::BI__builtin_ia32_vfmaddps256: case X86::BI__builtin_ia32_vfmaddpd256: case X86::BI__builtin_ia32_vfmaddph512_mask: case X86::BI__builtin_ia32_vfmaddph512_maskz: case X86::BI__builtin_ia32_vfmaddph512_mask3: case X86::BI__builtin_ia32_vfmaddps512_mask: case X86::BI__builtin_ia32_vfmaddps512_maskz: case X86::BI__builtin_ia32_vfmaddps512_mask3: case X86::BI__builtin_ia32_vfmsubps512_mask3: case X86::BI__builtin_ia32_vfmaddpd512_mask: case X86::BI__builtin_ia32_vfmaddpd512_maskz: case X86::BI__builtin_ia32_vfmaddpd512_mask3: case X86::BI__builtin_ia32_vfmsubpd512_mask3: case X86::BI__builtin_ia32_vfmsubph512_mask3: return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false); case X86::BI__builtin_ia32_vfmaddsubph512_mask: case X86::BI__builtin_ia32_vfmaddsubph512_maskz: case X86::BI__builtin_ia32_vfmaddsubph512_mask3: case X86::BI__builtin_ia32_vfmsubaddph512_mask3: case X86::BI__builtin_ia32_vfmaddsubps512_mask: case X86::BI__builtin_ia32_vfmaddsubps512_maskz: case X86::BI__builtin_ia32_vfmaddsubps512_mask3: case X86::BI__builtin_ia32_vfmsubaddps512_mask3: case X86::BI__builtin_ia32_vfmaddsubpd512_mask: case X86::BI__builtin_ia32_vfmaddsubpd512_maskz: case X86::BI__builtin_ia32_vfmaddsubpd512_mask3: case X86::BI__builtin_ia32_vfmsubaddpd512_mask3: return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ true); case X86::BI__builtin_ia32_movdqa32store128_mask: case X86::BI__builtin_ia32_movdqa64store128_mask: case X86::BI__builtin_ia32_storeaps128_mask: case X86::BI__builtin_ia32_storeapd128_mask: case X86::BI__builtin_ia32_movdqa32store256_mask: case X86::BI__builtin_ia32_movdqa64store256_mask: case X86::BI__builtin_ia32_storeaps256_mask: case X86::BI__builtin_ia32_storeapd256_mask: case X86::BI__builtin_ia32_movdqa32store512_mask: case X86::BI__builtin_ia32_movdqa64store512_mask: case X86::BI__builtin_ia32_storeaps512_mask: case X86::BI__builtin_ia32_storeapd512_mask: return EmitX86MaskedStore( *this, Ops, getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); case X86::BI__builtin_ia32_loadups128_mask: case X86::BI__builtin_ia32_loadups256_mask: case X86::BI__builtin_ia32_loadups512_mask: case X86::BI__builtin_ia32_loadupd128_mask: case X86::BI__builtin_ia32_loadupd256_mask: case X86::BI__builtin_ia32_loadupd512_mask: case X86::BI__builtin_ia32_loaddquqi128_mask: case X86::BI__builtin_ia32_loaddquqi256_mask: case X86::BI__builtin_ia32_loaddquqi512_mask: case X86::BI__builtin_ia32_loaddquhi128_mask: case X86::BI__builtin_ia32_loaddquhi256_mask: case X86::BI__builtin_ia32_loaddquhi512_mask: case X86::BI__builtin_ia32_loaddqusi128_mask: case X86::BI__builtin_ia32_loaddqusi256_mask: case X86::BI__builtin_ia32_loaddqusi512_mask: case X86::BI__builtin_ia32_loaddqudi128_mask: case X86::BI__builtin_ia32_loaddqudi256_mask: case X86::BI__builtin_ia32_loaddqudi512_mask: return EmitX86MaskedLoad(*this, Ops, Align(1)); case X86::BI__builtin_ia32_loadsh128_mask: case X86::BI__builtin_ia32_loadss128_mask: case X86::BI__builtin_ia32_loadsd128_mask: return EmitX86MaskedLoad(*this, Ops, Align(1)); case X86::BI__builtin_ia32_loadaps128_mask: case X86::BI__builtin_ia32_loadaps256_mask: case X86::BI__builtin_ia32_loadaps512_mask: case X86::BI__builtin_ia32_loadapd128_mask: case X86::BI__builtin_ia32_loadapd256_mask: case X86::BI__builtin_ia32_loadapd512_mask: case X86::BI__builtin_ia32_movdqa32load128_mask: case X86::BI__builtin_ia32_movdqa32load256_mask: case X86::BI__builtin_ia32_movdqa32load512_mask: case X86::BI__builtin_ia32_movdqa64load128_mask: case X86::BI__builtin_ia32_movdqa64load256_mask: case X86::BI__builtin_ia32_movdqa64load512_mask: return EmitX86MaskedLoad( *this, Ops, getContext().getTypeAlignInChars(E->getArg(1)->getType()).getAsAlign()); case X86::BI__builtin_ia32_expandloaddf128_mask: case X86::BI__builtin_ia32_expandloaddf256_mask: case X86::BI__builtin_ia32_expandloaddf512_mask: case X86::BI__builtin_ia32_expandloadsf128_mask: case X86::BI__builtin_ia32_expandloadsf256_mask: case X86::BI__builtin_ia32_expandloadsf512_mask: case X86::BI__builtin_ia32_expandloaddi128_mask: case X86::BI__builtin_ia32_expandloaddi256_mask: case X86::BI__builtin_ia32_expandloaddi512_mask: case X86::BI__builtin_ia32_expandloadsi128_mask: case X86::BI__builtin_ia32_expandloadsi256_mask: case X86::BI__builtin_ia32_expandloadsi512_mask: case X86::BI__builtin_ia32_expandloadhi128_mask: case X86::BI__builtin_ia32_expandloadhi256_mask: case X86::BI__builtin_ia32_expandloadhi512_mask: case X86::BI__builtin_ia32_expandloadqi128_mask: case X86::BI__builtin_ia32_expandloadqi256_mask: case X86::BI__builtin_ia32_expandloadqi512_mask: return EmitX86ExpandLoad(*this, Ops); case X86::BI__builtin_ia32_compressstoredf128_mask: case X86::BI__builtin_ia32_compressstoredf256_mask: case X86::BI__builtin_ia32_compressstoredf512_mask: case X86::BI__builtin_ia32_compressstoresf128_mask: case X86::BI__builtin_ia32_compressstoresf256_mask: case X86::BI__builtin_ia32_compressstoresf512_mask: case X86::BI__builtin_ia32_compressstoredi128_mask: case X86::BI__builtin_ia32_compressstoredi256_mask: case X86::BI__builtin_ia32_compressstoredi512_mask: case X86::BI__builtin_ia32_compressstoresi128_mask: case X86::BI__builtin_ia32_compressstoresi256_mask: case X86::BI__builtin_ia32_compressstoresi512_mask: case X86::BI__builtin_ia32_compressstorehi128_mask: case X86::BI__builtin_ia32_compressstorehi256_mask: case X86::BI__builtin_ia32_compressstorehi512_mask: case X86::BI__builtin_ia32_compressstoreqi128_mask: case X86::BI__builtin_ia32_compressstoreqi256_mask: case X86::BI__builtin_ia32_compressstoreqi512_mask: return EmitX86CompressStore(*this, Ops); case X86::BI__builtin_ia32_expanddf128_mask: case X86::BI__builtin_ia32_expanddf256_mask: case X86::BI__builtin_ia32_expanddf512_mask: case X86::BI__builtin_ia32_expandsf128_mask: case X86::BI__builtin_ia32_expandsf256_mask: case X86::BI__builtin_ia32_expandsf512_mask: case X86::BI__builtin_ia32_expanddi128_mask: case X86::BI__builtin_ia32_expanddi256_mask: case X86::BI__builtin_ia32_expanddi512_mask: case X86::BI__builtin_ia32_expandsi128_mask: case X86::BI__builtin_ia32_expandsi256_mask: case X86::BI__builtin_ia32_expandsi512_mask: case X86::BI__builtin_ia32_expandhi128_mask: case X86::BI__builtin_ia32_expandhi256_mask: case X86::BI__builtin_ia32_expandhi512_mask: case X86::BI__builtin_ia32_expandqi128_mask: case X86::BI__builtin_ia32_expandqi256_mask: case X86::BI__builtin_ia32_expandqi512_mask: return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false); case X86::BI__builtin_ia32_compressdf128_mask: case X86::BI__builtin_ia32_compressdf256_mask: case X86::BI__builtin_ia32_compressdf512_mask: case X86::BI__builtin_ia32_compresssf128_mask: case X86::BI__builtin_ia32_compresssf256_mask: case X86::BI__builtin_ia32_compresssf512_mask: case X86::BI__builtin_ia32_compressdi128_mask: case X86::BI__builtin_ia32_compressdi256_mask: case X86::BI__builtin_ia32_compressdi512_mask: case X86::BI__builtin_ia32_compresssi128_mask: case X86::BI__builtin_ia32_compresssi256_mask: case X86::BI__builtin_ia32_compresssi512_mask: case X86::BI__builtin_ia32_compresshi128_mask: case X86::BI__builtin_ia32_compresshi256_mask: case X86::BI__builtin_ia32_compresshi512_mask: case X86::BI__builtin_ia32_compressqi128_mask: case X86::BI__builtin_ia32_compressqi256_mask: case X86::BI__builtin_ia32_compressqi512_mask: return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true); case X86::BI__builtin_ia32_gather3div2df: case X86::BI__builtin_ia32_gather3div2di: case X86::BI__builtin_ia32_gather3div4df: case X86::BI__builtin_ia32_gather3div4di: case X86::BI__builtin_ia32_gather3div4sf: case X86::BI__builtin_ia32_gather3div4si: case X86::BI__builtin_ia32_gather3div8sf: case X86::BI__builtin_ia32_gather3div8si: case X86::BI__builtin_ia32_gather3siv2df: case X86::BI__builtin_ia32_gather3siv2di: case X86::BI__builtin_ia32_gather3siv4df: case X86::BI__builtin_ia32_gather3siv4di: case X86::BI__builtin_ia32_gather3siv4sf: case X86::BI__builtin_ia32_gather3siv4si: case X86::BI__builtin_ia32_gather3siv8sf: case X86::BI__builtin_ia32_gather3siv8si: case X86::BI__builtin_ia32_gathersiv8df: case X86::BI__builtin_ia32_gathersiv16sf: case X86::BI__builtin_ia32_gatherdiv8df: case X86::BI__builtin_ia32_gatherdiv16sf: case X86::BI__builtin_ia32_gathersiv8di: case X86::BI__builtin_ia32_gathersiv16si: case X86::BI__builtin_ia32_gatherdiv8di: case X86::BI__builtin_ia32_gatherdiv16si: { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unexpected builtin"); case X86::BI__builtin_ia32_gather3div2df: IID = Intrinsic::x86_avx512_mask_gather3div2_df; break; case X86::BI__builtin_ia32_gather3div2di: IID = Intrinsic::x86_avx512_mask_gather3div2_di; break; case X86::BI__builtin_ia32_gather3div4df: IID = Intrinsic::x86_avx512_mask_gather3div4_df; break; case X86::BI__builtin_ia32_gather3div4di: IID = Intrinsic::x86_avx512_mask_gather3div4_di; break; case X86::BI__builtin_ia32_gather3div4sf: IID = Intrinsic::x86_avx512_mask_gather3div4_sf; break; case X86::BI__builtin_ia32_gather3div4si: IID = Intrinsic::x86_avx512_mask_gather3div4_si; break; case X86::BI__builtin_ia32_gather3div8sf: IID = Intrinsic::x86_avx512_mask_gather3div8_sf; break; case X86::BI__builtin_ia32_gather3div8si: IID = Intrinsic::x86_avx512_mask_gather3div8_si; break; case X86::BI__builtin_ia32_gather3siv2df: IID = Intrinsic::x86_avx512_mask_gather3siv2_df; break; case X86::BI__builtin_ia32_gather3siv2di: IID = Intrinsic::x86_avx512_mask_gather3siv2_di; break; case X86::BI__builtin_ia32_gather3siv4df: IID = Intrinsic::x86_avx512_mask_gather3siv4_df; break; case X86::BI__builtin_ia32_gather3siv4di: IID = Intrinsic::x86_avx512_mask_gather3siv4_di; break; case X86::BI__builtin_ia32_gather3siv4sf: IID = Intrinsic::x86_avx512_mask_gather3siv4_sf; break; case X86::BI__builtin_ia32_gather3siv4si: IID = Intrinsic::x86_avx512_mask_gather3siv4_si; break; case X86::BI__builtin_ia32_gather3siv8sf: IID = Intrinsic::x86_avx512_mask_gather3siv8_sf; break; case X86::BI__builtin_ia32_gather3siv8si: IID = Intrinsic::x86_avx512_mask_gather3siv8_si; break; case X86::BI__builtin_ia32_gathersiv8df: IID = Intrinsic::x86_avx512_mask_gather_dpd_512; break; case X86::BI__builtin_ia32_gathersiv16sf: IID = Intrinsic::x86_avx512_mask_gather_dps_512; break; case X86::BI__builtin_ia32_gatherdiv8df: IID = Intrinsic::x86_avx512_mask_gather_qpd_512; break; case X86::BI__builtin_ia32_gatherdiv16sf: IID = Intrinsic::x86_avx512_mask_gather_qps_512; break; case X86::BI__builtin_ia32_gathersiv8di: IID = Intrinsic::x86_avx512_mask_gather_dpq_512; break; case X86::BI__builtin_ia32_gathersiv16si: IID = Intrinsic::x86_avx512_mask_gather_dpi_512; break; case X86::BI__builtin_ia32_gatherdiv8di: IID = Intrinsic::x86_avx512_mask_gather_qpq_512; break; case X86::BI__builtin_ia32_gatherdiv16si: IID = Intrinsic::x86_avx512_mask_gather_qpi_512; break; } unsigned MinElts = std::min( cast(Ops[0]->getType())->getNumElements(), cast(Ops[2]->getType())->getNumElements()); Ops[3] = getMaskVecValue(*this, Ops[3], MinElts); Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, Ops); } case X86::BI__builtin_ia32_scattersiv8df: case X86::BI__builtin_ia32_scattersiv16sf: case X86::BI__builtin_ia32_scatterdiv8df: case X86::BI__builtin_ia32_scatterdiv16sf: case X86::BI__builtin_ia32_scattersiv8di: case X86::BI__builtin_ia32_scattersiv16si: case X86::BI__builtin_ia32_scatterdiv8di: case X86::BI__builtin_ia32_scatterdiv16si: case X86::BI__builtin_ia32_scatterdiv2df: case X86::BI__builtin_ia32_scatterdiv2di: case X86::BI__builtin_ia32_scatterdiv4df: case X86::BI__builtin_ia32_scatterdiv4di: case X86::BI__builtin_ia32_scatterdiv4sf: case X86::BI__builtin_ia32_scatterdiv4si: case X86::BI__builtin_ia32_scatterdiv8sf: case X86::BI__builtin_ia32_scatterdiv8si: case X86::BI__builtin_ia32_scattersiv2df: case X86::BI__builtin_ia32_scattersiv2di: case X86::BI__builtin_ia32_scattersiv4df: case X86::BI__builtin_ia32_scattersiv4di: case X86::BI__builtin_ia32_scattersiv4sf: case X86::BI__builtin_ia32_scattersiv4si: case X86::BI__builtin_ia32_scattersiv8sf: case X86::BI__builtin_ia32_scattersiv8si: { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unexpected builtin"); case X86::BI__builtin_ia32_scattersiv8df: IID = Intrinsic::x86_avx512_mask_scatter_dpd_512; break; case X86::BI__builtin_ia32_scattersiv16sf: IID = Intrinsic::x86_avx512_mask_scatter_dps_512; break; case X86::BI__builtin_ia32_scatterdiv8df: IID = Intrinsic::x86_avx512_mask_scatter_qpd_512; break; case X86::BI__builtin_ia32_scatterdiv16sf: IID = Intrinsic::x86_avx512_mask_scatter_qps_512; break; case X86::BI__builtin_ia32_scattersiv8di: IID = Intrinsic::x86_avx512_mask_scatter_dpq_512; break; case X86::BI__builtin_ia32_scattersiv16si: IID = Intrinsic::x86_avx512_mask_scatter_dpi_512; break; case X86::BI__builtin_ia32_scatterdiv8di: IID = Intrinsic::x86_avx512_mask_scatter_qpq_512; break; case X86::BI__builtin_ia32_scatterdiv16si: IID = Intrinsic::x86_avx512_mask_scatter_qpi_512; break; case X86::BI__builtin_ia32_scatterdiv2df: IID = Intrinsic::x86_avx512_mask_scatterdiv2_df; break; case X86::BI__builtin_ia32_scatterdiv2di: IID = Intrinsic::x86_avx512_mask_scatterdiv2_di; break; case X86::BI__builtin_ia32_scatterdiv4df: IID = Intrinsic::x86_avx512_mask_scatterdiv4_df; break; case X86::BI__builtin_ia32_scatterdiv4di: IID = Intrinsic::x86_avx512_mask_scatterdiv4_di; break; case X86::BI__builtin_ia32_scatterdiv4sf: IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf; break; case X86::BI__builtin_ia32_scatterdiv4si: IID = Intrinsic::x86_avx512_mask_scatterdiv4_si; break; case X86::BI__builtin_ia32_scatterdiv8sf: IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf; break; case X86::BI__builtin_ia32_scatterdiv8si: IID = Intrinsic::x86_avx512_mask_scatterdiv8_si; break; case X86::BI__builtin_ia32_scattersiv2df: IID = Intrinsic::x86_avx512_mask_scattersiv2_df; break; case X86::BI__builtin_ia32_scattersiv2di: IID = Intrinsic::x86_avx512_mask_scattersiv2_di; break; case X86::BI__builtin_ia32_scattersiv4df: IID = Intrinsic::x86_avx512_mask_scattersiv4_df; break; case X86::BI__builtin_ia32_scattersiv4di: IID = Intrinsic::x86_avx512_mask_scattersiv4_di; break; case X86::BI__builtin_ia32_scattersiv4sf: IID = Intrinsic::x86_avx512_mask_scattersiv4_sf; break; case X86::BI__builtin_ia32_scattersiv4si: IID = Intrinsic::x86_avx512_mask_scattersiv4_si; break; case X86::BI__builtin_ia32_scattersiv8sf: IID = Intrinsic::x86_avx512_mask_scattersiv8_sf; break; case X86::BI__builtin_ia32_scattersiv8si: IID = Intrinsic::x86_avx512_mask_scattersiv8_si; break; } unsigned MinElts = std::min( cast(Ops[2]->getType())->getNumElements(), cast(Ops[3]->getType())->getNumElements()); Ops[1] = getMaskVecValue(*this, Ops[1], MinElts); Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, Ops); } case X86::BI__builtin_ia32_vextractf128_pd256: case X86::BI__builtin_ia32_vextractf128_ps256: case X86::BI__builtin_ia32_vextractf128_si256: case X86::BI__builtin_ia32_extract128i256: case X86::BI__builtin_ia32_extractf64x4_mask: case X86::BI__builtin_ia32_extractf32x4_mask: case X86::BI__builtin_ia32_extracti64x4_mask: case X86::BI__builtin_ia32_extracti32x4_mask: case X86::BI__builtin_ia32_extractf32x8_mask: case X86::BI__builtin_ia32_extracti32x8_mask: case X86::BI__builtin_ia32_extractf32x4_256_mask: case X86::BI__builtin_ia32_extracti32x4_256_mask: case X86::BI__builtin_ia32_extractf64x2_256_mask: case X86::BI__builtin_ia32_extracti64x2_256_mask: case X86::BI__builtin_ia32_extractf64x2_512_mask: case X86::BI__builtin_ia32_extracti64x2_512_mask: { auto *DstTy = cast(ConvertType(E->getType())); unsigned NumElts = DstTy->getNumElements(); unsigned SrcNumElts = cast(Ops[0]->getType())->getNumElements(); unsigned SubVectors = SrcNumElts / NumElts; unsigned Index = cast(Ops[1])->getZExtValue(); assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); Index &= SubVectors - 1; // Remove any extra bits. Index *= NumElts; int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + Index; Value *Res = Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "extract"); if (Ops.size() == 4) Res = EmitX86Select(*this, Ops[3], Res, Ops[2]); return Res; } case X86::BI__builtin_ia32_vinsertf128_pd256: case X86::BI__builtin_ia32_vinsertf128_ps256: case X86::BI__builtin_ia32_vinsertf128_si256: case X86::BI__builtin_ia32_insert128i256: case X86::BI__builtin_ia32_insertf64x4: case X86::BI__builtin_ia32_insertf32x4: case X86::BI__builtin_ia32_inserti64x4: case X86::BI__builtin_ia32_inserti32x4: case X86::BI__builtin_ia32_insertf32x8: case X86::BI__builtin_ia32_inserti32x8: case X86::BI__builtin_ia32_insertf32x4_256: case X86::BI__builtin_ia32_inserti32x4_256: case X86::BI__builtin_ia32_insertf64x2_256: case X86::BI__builtin_ia32_inserti64x2_256: case X86::BI__builtin_ia32_insertf64x2_512: case X86::BI__builtin_ia32_inserti64x2_512: { unsigned DstNumElts = cast(Ops[0]->getType())->getNumElements(); unsigned SrcNumElts = cast(Ops[1]->getType())->getNumElements(); unsigned SubVectors = DstNumElts / SrcNumElts; unsigned Index = cast(Ops[2])->getZExtValue(); assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); Index &= SubVectors - 1; // Remove any extra bits. Index *= SrcNumElts; int Indices[16]; for (unsigned i = 0; i != DstNumElts; ++i) Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i; Value *Op1 = Builder.CreateShuffleVector( Ops[1], ArrayRef(Indices, DstNumElts), "widen"); for (unsigned i = 0; i != DstNumElts; ++i) { if (i >= Index && i < (Index + SrcNumElts)) Indices[i] = (i - Index) + DstNumElts; else Indices[i] = i; } return Builder.CreateShuffleVector(Ops[0], Op1, ArrayRef(Indices, DstNumElts), "insert"); } case X86::BI__builtin_ia32_pmovqd512_mask: case X86::BI__builtin_ia32_pmovwb512_mask: { Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType()); return EmitX86Select(*this, Ops[2], Res, Ops[1]); } case X86::BI__builtin_ia32_pmovdb512_mask: case X86::BI__builtin_ia32_pmovdw512_mask: case X86::BI__builtin_ia32_pmovqw512_mask: { if (const auto *C = dyn_cast(Ops[2])) if (C->isAllOnesValue()) return Builder.CreateTrunc(Ops[0], Ops[1]->getType()); Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_pmovdb512_mask: IID = Intrinsic::x86_avx512_mask_pmov_db_512; break; case X86::BI__builtin_ia32_pmovdw512_mask: IID = Intrinsic::x86_avx512_mask_pmov_dw_512; break; case X86::BI__builtin_ia32_pmovqw512_mask: IID = Intrinsic::x86_avx512_mask_pmov_qw_512; break; } Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, Ops); } case X86::BI__builtin_ia32_pblendw128: case X86::BI__builtin_ia32_blendpd: case X86::BI__builtin_ia32_blendps: case X86::BI__builtin_ia32_blendpd256: case X86::BI__builtin_ia32_blendps256: case X86::BI__builtin_ia32_pblendw256: case X86::BI__builtin_ia32_pblendd128: case X86::BI__builtin_ia32_pblendd256: { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); unsigned Imm = cast(Ops[2])->getZExtValue(); int Indices[16]; // If there are more than 8 elements, the immediate is used twice so make // sure we handle that. for (unsigned i = 0; i != NumElts; ++i) Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i; return Builder.CreateShuffleVector(Ops[0], Ops[1], ArrayRef(Indices, NumElts), "blend"); } case X86::BI__builtin_ia32_pshuflw: case X86::BI__builtin_ia32_pshuflw256: case X86::BI__builtin_ia32_pshuflw512: { uint32_t Imm = cast(Ops[1])->getZExtValue(); auto *Ty = cast(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; int Indices[32]; for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) { Indices[l + i] = l + (Imm & 3); Imm >>= 2; } for (unsigned i = 4; i != 8; ++i) Indices[l + i] = l + i; } return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "pshuflw"); } case X86::BI__builtin_ia32_pshufhw: case X86::BI__builtin_ia32_pshufhw256: case X86::BI__builtin_ia32_pshufhw512: { uint32_t Imm = cast(Ops[1])->getZExtValue(); auto *Ty = cast(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; int Indices[32]; for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + i; for (unsigned i = 4; i != 8; ++i) { Indices[l + i] = l + 4 + (Imm & 3); Imm >>= 2; } } return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "pshufhw"); } case X86::BI__builtin_ia32_pshufd: case X86::BI__builtin_ia32_pshufd256: case X86::BI__builtin_ia32_pshufd512: case X86::BI__builtin_ia32_vpermilpd: case X86::BI__builtin_ia32_vpermilps: case X86::BI__builtin_ia32_vpermilpd256: case X86::BI__builtin_ia32_vpermilps256: case X86::BI__builtin_ia32_vpermilpd512: case X86::BI__builtin_ia32_vpermilps512: { uint32_t Imm = cast(Ops[1])->getZExtValue(); auto *Ty = cast(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { Indices[i + l] = (Imm % NumLaneElts) + l; Imm /= NumLaneElts; } } return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "permil"); } case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: case X86::BI__builtin_ia32_shufpd512: case X86::BI__builtin_ia32_shufps: case X86::BI__builtin_ia32_shufps256: case X86::BI__builtin_ia32_shufps512: { uint32_t Imm = cast(Ops[2])->getZExtValue(); auto *Ty = cast(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128; unsigned NumLaneElts = NumElts / NumLanes; // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { unsigned Index = Imm % NumLaneElts; Imm /= NumLaneElts; if (i >= (NumLaneElts / 2)) Index += NumElts; Indices[l + i] = l + Index; } } return Builder.CreateShuffleVector(Ops[0], Ops[1], ArrayRef(Indices, NumElts), "shufp"); } case X86::BI__builtin_ia32_permdi256: case X86::BI__builtin_ia32_permdf256: case X86::BI__builtin_ia32_permdi512: case X86::BI__builtin_ia32_permdf512: { unsigned Imm = cast(Ops[1])->getZExtValue(); auto *Ty = cast(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); // These intrinsics operate on 256-bit lanes of four 64-bit elements. int Indices[8]; for (unsigned l = 0; l != NumElts; l += 4) for (unsigned i = 0; i != 4; ++i) Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3); return Builder.CreateShuffleVector(Ops[0], ArrayRef(Indices, NumElts), "perm"); } case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: { unsigned ShiftVal = cast(Ops[2])->getZExtValue() & 0xff; unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); assert(NumElts % 16 == 0); // If palignr is shifting the pair of vectors more than the size of two // lanes, emit zero. if (ShiftVal >= 32) return llvm::Constant::getNullValue(ConvertType(E->getType())); // If palignr is shifting the pair of input vectors more than one lane, // but less than two lanes, convert to shifting in zeroes. if (ShiftVal > 16) { ShiftVal -= 16; Ops[1] = Ops[0]; Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); } int Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { unsigned Idx = ShiftVal + i; if (Idx >= 16) Idx += NumElts - 16; // End of lane, switch operand. Indices[l + i] = Idx + l; } } return Builder.CreateShuffleVector(Ops[1], Ops[0], ArrayRef(Indices, NumElts), "palignr"); } case X86::BI__builtin_ia32_alignd128: case X86::BI__builtin_ia32_alignd256: case X86::BI__builtin_ia32_alignd512: case X86::BI__builtin_ia32_alignq128: case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); unsigned ShiftVal = cast(Ops[2])->getZExtValue() & 0xff; // Mask the shift amount to width of a vector. ShiftVal &= NumElts - 1; int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + ShiftVal; return Builder.CreateShuffleVector(Ops[1], Ops[0], ArrayRef(Indices, NumElts), "valign"); } case X86::BI__builtin_ia32_shuf_f32x4_256: case X86::BI__builtin_ia32_shuf_f64x2_256: case X86::BI__builtin_ia32_shuf_i32x4_256: case X86::BI__builtin_ia32_shuf_i64x2_256: case X86::BI__builtin_ia32_shuf_f32x4: case X86::BI__builtin_ia32_shuf_f64x2: case X86::BI__builtin_ia32_shuf_i32x4: case X86::BI__builtin_ia32_shuf_i64x2: { unsigned Imm = cast(Ops[2])->getZExtValue(); auto *Ty = cast(Ops[0]->getType()); unsigned NumElts = Ty->getNumElements(); unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2; unsigned NumLaneElts = NumElts / NumLanes; int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { unsigned Index = (Imm % NumLanes) * NumLaneElts; Imm /= NumLanes; // Discard the bits we just used. if (l >= (NumElts / 2)) Index += NumElts; // Switch to other source. for (unsigned i = 0; i != NumLaneElts; ++i) { Indices[l + i] = Index + i; } } return Builder.CreateShuffleVector(Ops[0], Ops[1], ArrayRef(Indices, NumElts), "shuf"); } case X86::BI__builtin_ia32_vperm2f128_pd256: case X86::BI__builtin_ia32_vperm2f128_ps256: case X86::BI__builtin_ia32_vperm2f128_si256: case X86::BI__builtin_ia32_permti256: { unsigned Imm = cast(Ops[2])->getZExtValue(); unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); // This takes a very simple approach since there are two lanes and a // shuffle can have 2 inputs. So we reserve the first input for the first // lane and the second input for the second lane. This may result in // duplicate sources, but this can be dealt with in the backend. Value *OutOps[2]; int Indices[8]; for (unsigned l = 0; l != 2; ++l) { // Determine the source for this lane. if (Imm & (1 << ((l * 4) + 3))) OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType()); else if (Imm & (1 << ((l * 4) + 1))) OutOps[l] = Ops[1]; else OutOps[l] = Ops[0]; for (unsigned i = 0; i != NumElts/2; ++i) { // Start with ith element of the source for this lane. unsigned Idx = (l * NumElts) + i; // If bit 0 of the immediate half is set, switch to the high half of // the source. if (Imm & (1 << (l * 4))) Idx += NumElts/2; Indices[(l * (NumElts/2)) + i] = Idx; } } return Builder.CreateShuffleVector(OutOps[0], OutOps[1], ArrayRef(Indices, NumElts), "vperm"); } case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: { unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; auto *ResultType = cast(Ops[0]->getType()); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getNumElements() * 8; // If pslldq is shifting the vector more than 15 bytes, emit zero. if (ShiftVal >= 16) return llvm::Constant::getNullValue(ResultType); int Indices[64]; // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { unsigned Idx = NumElts + i - ShiftVal; if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand. Indices[l + i] = Idx + l; } } auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); Value *SV = Builder.CreateShuffleVector( Zero, Cast, ArrayRef(Indices, NumElts), "pslldq"); return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast"); } case X86::BI__builtin_ia32_psrldqi128_byteshift: case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: { unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; auto *ResultType = cast(Ops[0]->getType()); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getNumElements() * 8; // If psrldq is shifting the vector more than 15 bytes, emit zero. if (ShiftVal >= 16) return llvm::Constant::getNullValue(ResultType); int Indices[64]; // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l != NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { unsigned Idx = i + ShiftVal; if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand. Indices[l + i] = Idx + l; } } auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts); Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast"); Value *Zero = llvm::Constant::getNullValue(VecTy); Value *SV = Builder.CreateShuffleVector( Cast, Zero, ArrayRef(Indices, NumElts), "psrldq"); return Builder.CreateBitCast(SV, ResultType, "cast"); } case X86::BI__builtin_ia32_kshiftliqi: case X86::BI__builtin_ia32_kshiftlihi: case X86::BI__builtin_ia32_kshiftlisi: case X86::BI__builtin_ia32_kshiftlidi: { unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); if (ShiftVal >= NumElts) return llvm::Constant::getNullValue(Ops[0]->getType()); Value *In = getMaskVecValue(*this, Ops[0], NumElts); int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = NumElts + i - ShiftVal; Value *Zero = llvm::Constant::getNullValue(In->getType()); Value *SV = Builder.CreateShuffleVector( Zero, In, ArrayRef(Indices, NumElts), "kshiftl"); return Builder.CreateBitCast(SV, Ops[0]->getType()); } case X86::BI__builtin_ia32_kshiftriqi: case X86::BI__builtin_ia32_kshiftrihi: case X86::BI__builtin_ia32_kshiftrisi: case X86::BI__builtin_ia32_kshiftridi: { unsigned ShiftVal = cast(Ops[1])->getZExtValue() & 0xff; unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); if (ShiftVal >= NumElts) return llvm::Constant::getNullValue(Ops[0]->getType()); Value *In = getMaskVecValue(*this, Ops[0], NumElts); int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i + ShiftVal; Value *Zero = llvm::Constant::getNullValue(In->getType()); Value *SV = Builder.CreateShuffleVector( In, Zero, ArrayRef(Indices, NumElts), "kshiftr"); return Builder.CreateBitCast(SV, Ops[0]->getType()); } case X86::BI__builtin_ia32_movnti: case X86::BI__builtin_ia32_movnti64: case X86::BI__builtin_ia32_movntsd: case X86::BI__builtin_ia32_movntss: { llvm::MDNode *Node = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); Value *Ptr = Ops[0]; Value *Src = Ops[1]; // Extract the 0'th element of the source vector. if (BuiltinID == X86::BI__builtin_ia32_movntsd || BuiltinID == X86::BI__builtin_ia32_movntss) Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract"); // Unaligned nontemporal store of the scalar value. StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, Ptr); SI->setMetadata(llvm::LLVMContext::MD_nontemporal, Node); SI->setAlignment(llvm::Align(1)); return SI; } // Rotate is a special case of funnel shift - 1st 2 args are the same. case X86::BI__builtin_ia32_vprotb: case X86::BI__builtin_ia32_vprotw: case X86::BI__builtin_ia32_vprotd: case X86::BI__builtin_ia32_vprotq: case X86::BI__builtin_ia32_vprotbi: case X86::BI__builtin_ia32_vprotwi: case X86::BI__builtin_ia32_vprotdi: case X86::BI__builtin_ia32_vprotqi: case X86::BI__builtin_ia32_prold128: case X86::BI__builtin_ia32_prold256: case X86::BI__builtin_ia32_prold512: case X86::BI__builtin_ia32_prolq128: case X86::BI__builtin_ia32_prolq256: case X86::BI__builtin_ia32_prolq512: case X86::BI__builtin_ia32_prolvd128: case X86::BI__builtin_ia32_prolvd256: case X86::BI__builtin_ia32_prolvd512: case X86::BI__builtin_ia32_prolvq128: case X86::BI__builtin_ia32_prolvq256: case X86::BI__builtin_ia32_prolvq512: return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false); case X86::BI__builtin_ia32_prord128: case X86::BI__builtin_ia32_prord256: case X86::BI__builtin_ia32_prord512: case X86::BI__builtin_ia32_prorq128: case X86::BI__builtin_ia32_prorq256: case X86::BI__builtin_ia32_prorq512: case X86::BI__builtin_ia32_prorvd128: case X86::BI__builtin_ia32_prorvd256: case X86::BI__builtin_ia32_prorvd512: case X86::BI__builtin_ia32_prorvq128: case X86::BI__builtin_ia32_prorvq256: case X86::BI__builtin_ia32_prorvq512: return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true); case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: case X86::BI__builtin_ia32_selectb_512: case X86::BI__builtin_ia32_selectw_128: case X86::BI__builtin_ia32_selectw_256: case X86::BI__builtin_ia32_selectw_512: case X86::BI__builtin_ia32_selectd_128: case X86::BI__builtin_ia32_selectd_256: case X86::BI__builtin_ia32_selectd_512: case X86::BI__builtin_ia32_selectq_128: case X86::BI__builtin_ia32_selectq_256: case X86::BI__builtin_ia32_selectq_512: case X86::BI__builtin_ia32_selectph_128: case X86::BI__builtin_ia32_selectph_256: case X86::BI__builtin_ia32_selectph_512: case X86::BI__builtin_ia32_selectpbf_128: case X86::BI__builtin_ia32_selectpbf_256: case X86::BI__builtin_ia32_selectpbf_512: case X86::BI__builtin_ia32_selectps_128: case X86::BI__builtin_ia32_selectps_256: case X86::BI__builtin_ia32_selectps_512: case X86::BI__builtin_ia32_selectpd_128: case X86::BI__builtin_ia32_selectpd_256: case X86::BI__builtin_ia32_selectpd_512: return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); case X86::BI__builtin_ia32_selectsh_128: case X86::BI__builtin_ia32_selectsbf_128: case X86::BI__builtin_ia32_selectss_128: case X86::BI__builtin_ia32_selectsd_128: { Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0); A = EmitX86ScalarSelect(*this, Ops[0], A, B); return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0); } case X86::BI__builtin_ia32_cmpb128_mask: case X86::BI__builtin_ia32_cmpb256_mask: case X86::BI__builtin_ia32_cmpb512_mask: case X86::BI__builtin_ia32_cmpw128_mask: case X86::BI__builtin_ia32_cmpw256_mask: case X86::BI__builtin_ia32_cmpw512_mask: case X86::BI__builtin_ia32_cmpd128_mask: case X86::BI__builtin_ia32_cmpd256_mask: case X86::BI__builtin_ia32_cmpd512_mask: case X86::BI__builtin_ia32_cmpq128_mask: case X86::BI__builtin_ia32_cmpq256_mask: case X86::BI__builtin_ia32_cmpq512_mask: { unsigned CC = cast(Ops[2])->getZExtValue() & 0x7; return EmitX86MaskedCompare(*this, CC, true, Ops); } case X86::BI__builtin_ia32_ucmpb128_mask: case X86::BI__builtin_ia32_ucmpb256_mask: case X86::BI__builtin_ia32_ucmpb512_mask: case X86::BI__builtin_ia32_ucmpw128_mask: case X86::BI__builtin_ia32_ucmpw256_mask: case X86::BI__builtin_ia32_ucmpw512_mask: case X86::BI__builtin_ia32_ucmpd128_mask: case X86::BI__builtin_ia32_ucmpd256_mask: case X86::BI__builtin_ia32_ucmpd512_mask: case X86::BI__builtin_ia32_ucmpq128_mask: case X86::BI__builtin_ia32_ucmpq256_mask: case X86::BI__builtin_ia32_ucmpq512_mask: { unsigned CC = cast(Ops[2])->getZExtValue() & 0x7; return EmitX86MaskedCompare(*this, CC, false, Ops); } case X86::BI__builtin_ia32_vpcomb: case X86::BI__builtin_ia32_vpcomw: case X86::BI__builtin_ia32_vpcomd: case X86::BI__builtin_ia32_vpcomq: return EmitX86vpcom(*this, Ops, true); case X86::BI__builtin_ia32_vpcomub: case X86::BI__builtin_ia32_vpcomuw: case X86::BI__builtin_ia32_vpcomud: case X86::BI__builtin_ia32_vpcomuq: return EmitX86vpcom(*this, Ops, false); case X86::BI__builtin_ia32_kortestcqi: case X86::BI__builtin_ia32_kortestchi: case X86::BI__builtin_ia32_kortestcsi: case X86::BI__builtin_ia32_kortestcdi: { Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType()); Value *Cmp = Builder.CreateICmpEQ(Or, C); return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } case X86::BI__builtin_ia32_kortestzqi: case X86::BI__builtin_ia32_kortestzhi: case X86::BI__builtin_ia32_kortestzsi: case X86::BI__builtin_ia32_kortestzdi: { Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops); Value *C = llvm::Constant::getNullValue(Ops[0]->getType()); Value *Cmp = Builder.CreateICmpEQ(Or, C); return Builder.CreateZExt(Cmp, ConvertType(E->getType())); } case X86::BI__builtin_ia32_ktestcqi: case X86::BI__builtin_ia32_ktestzqi: case X86::BI__builtin_ia32_ktestchi: case X86::BI__builtin_ia32_ktestzhi: case X86::BI__builtin_ia32_ktestcsi: case X86::BI__builtin_ia32_ktestzsi: case X86::BI__builtin_ia32_ktestcdi: case X86::BI__builtin_ia32_ktestzdi: { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_ktestcqi: IID = Intrinsic::x86_avx512_ktestc_b; break; case X86::BI__builtin_ia32_ktestzqi: IID = Intrinsic::x86_avx512_ktestz_b; break; case X86::BI__builtin_ia32_ktestchi: IID = Intrinsic::x86_avx512_ktestc_w; break; case X86::BI__builtin_ia32_ktestzhi: IID = Intrinsic::x86_avx512_ktestz_w; break; case X86::BI__builtin_ia32_ktestcsi: IID = Intrinsic::x86_avx512_ktestc_d; break; case X86::BI__builtin_ia32_ktestzsi: IID = Intrinsic::x86_avx512_ktestz_d; break; case X86::BI__builtin_ia32_ktestcdi: IID = Intrinsic::x86_avx512_ktestc_q; break; case X86::BI__builtin_ia32_ktestzdi: IID = Intrinsic::x86_avx512_ktestz_q; break; } unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); Function *Intr = CGM.getIntrinsic(IID); return Builder.CreateCall(Intr, {LHS, RHS}); } case X86::BI__builtin_ia32_kaddqi: case X86::BI__builtin_ia32_kaddhi: case X86::BI__builtin_ia32_kaddsi: case X86::BI__builtin_ia32_kadddi: { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_kaddqi: IID = Intrinsic::x86_avx512_kadd_b; break; case X86::BI__builtin_ia32_kaddhi: IID = Intrinsic::x86_avx512_kadd_w; break; case X86::BI__builtin_ia32_kaddsi: IID = Intrinsic::x86_avx512_kadd_d; break; case X86::BI__builtin_ia32_kadddi: IID = Intrinsic::x86_avx512_kadd_q; break; } unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); Function *Intr = CGM.getIntrinsic(IID); Value *Res = Builder.CreateCall(Intr, {LHS, RHS}); return Builder.CreateBitCast(Res, Ops[0]->getType()); } case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: case X86::BI__builtin_ia32_kanddi: return EmitX86MaskLogic(*this, Instruction::And, Ops); case X86::BI__builtin_ia32_kandnqi: case X86::BI__builtin_ia32_kandnhi: case X86::BI__builtin_ia32_kandnsi: case X86::BI__builtin_ia32_kandndi: return EmitX86MaskLogic(*this, Instruction::And, Ops, true); case X86::BI__builtin_ia32_korqi: case X86::BI__builtin_ia32_korhi: case X86::BI__builtin_ia32_korsi: case X86::BI__builtin_ia32_kordi: return EmitX86MaskLogic(*this, Instruction::Or, Ops); case X86::BI__builtin_ia32_kxnorqi: case X86::BI__builtin_ia32_kxnorhi: case X86::BI__builtin_ia32_kxnorsi: case X86::BI__builtin_ia32_kxnordi: return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true); case X86::BI__builtin_ia32_kxorqi: case X86::BI__builtin_ia32_kxorhi: case X86::BI__builtin_ia32_kxorsi: case X86::BI__builtin_ia32_kxordi: return EmitX86MaskLogic(*this, Instruction::Xor, Ops); case X86::BI__builtin_ia32_knotqi: case X86::BI__builtin_ia32_knothi: case X86::BI__builtin_ia32_knotsi: case X86::BI__builtin_ia32_knotdi: { unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *Res = getMaskVecValue(*this, Ops[0], NumElts); return Builder.CreateBitCast(Builder.CreateNot(Res), Ops[0]->getType()); } case X86::BI__builtin_ia32_kmovb: case X86::BI__builtin_ia32_kmovw: case X86::BI__builtin_ia32_kmovd: case X86::BI__builtin_ia32_kmovq: { // Bitcast to vXi1 type and then back to integer. This gets the mask // register type into the IR, but might be optimized out depending on // what's around it. unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *Res = getMaskVecValue(*this, Ops[0], NumElts); return Builder.CreateBitCast(Res, Ops[0]->getType()); } case X86::BI__builtin_ia32_kunpckdi: case X86::BI__builtin_ia32_kunpcksi: case X86::BI__builtin_ia32_kunpckhi: { unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth(); Value *LHS = getMaskVecValue(*this, Ops[0], NumElts); Value *RHS = getMaskVecValue(*this, Ops[1], NumElts); int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; // First extract half of each vector. This gives better codegen than // doing it in a single shuffle. LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2)); RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2)); // Concat the vectors. // NOTE: Operands are swapped to match the intrinsic definition. Value *Res = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts)); return Builder.CreateBitCast(Res, Ops[0]->getType()); } case X86::BI__builtin_ia32_vplzcntd_128: case X86::BI__builtin_ia32_vplzcntd_256: case X86::BI__builtin_ia32_vplzcntd_512: case X86::BI__builtin_ia32_vplzcntq_128: case X86::BI__builtin_ia32_vplzcntq_256: case X86::BI__builtin_ia32_vplzcntq_512: { Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}); } case X86::BI__builtin_ia32_sqrtss: case X86::BI__builtin_ia32_sqrtsd: { Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, {A}); } else { F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); A = Builder.CreateCall(F, {A}); } return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } case X86::BI__builtin_ia32_sqrtsh_round_mask: case X86::BI__builtin_ia32_sqrtsd_round_mask: case X86::BI__builtin_ia32_sqrtss_round_mask: { unsigned CC = cast(Ops[4])->getZExtValue(); // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), // otherwise keep the intrinsic. if (CC != 4) { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_sqrtsh_round_mask: IID = Intrinsic::x86_avx512fp16_mask_sqrt_sh; break; case X86::BI__builtin_ia32_sqrtsd_round_mask: IID = Intrinsic::x86_avx512_mask_sqrt_sd; break; case X86::BI__builtin_ia32_sqrtss_round_mask: IID = Intrinsic::x86_avx512_mask_sqrt_ss; break; } return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); Function *F; if (Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, A->getType()); A = Builder.CreateConstrainedFPCall(F, A); } else { F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); A = Builder.CreateCall(F, A); } Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); A = EmitX86ScalarSelect(*this, Ops[3], A, Src); return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd: case X86::BI__builtin_ia32_sqrtps256: case X86::BI__builtin_ia32_sqrtps: case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { if (Ops.size() == 2) { unsigned CC = cast(Ops[1])->getZExtValue(); // Support only if the rounding mode is 4 (AKA CUR_DIRECTION), // otherwise keep the intrinsic. if (CC != 4) { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_sqrtph512: IID = Intrinsic::x86_avx512fp16_sqrt_ph_512; break; case X86::BI__builtin_ia32_sqrtps512: IID = Intrinsic::x86_avx512_sqrt_ps_512; break; case X86::BI__builtin_ia32_sqrtpd512: IID = Intrinsic::x86_avx512_sqrt_pd_512; break; } return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } } if (Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, Ops[0]->getType()); return Builder.CreateConstrainedFPCall(F, Ops[0]); } else { Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType()); return Builder.CreateCall(F, Ops[0]); } } case X86::BI__builtin_ia32_pmuludq128: case X86::BI__builtin_ia32_pmuludq256: case X86::BI__builtin_ia32_pmuludq512: return EmitX86Muldq(*this, /*IsSigned*/false, Ops); case X86::BI__builtin_ia32_pmuldq128: case X86::BI__builtin_ia32_pmuldq256: case X86::BI__builtin_ia32_pmuldq512: return EmitX86Muldq(*this, /*IsSigned*/true, Ops); case X86::BI__builtin_ia32_pternlogd512_mask: case X86::BI__builtin_ia32_pternlogq512_mask: case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogq128_mask: case X86::BI__builtin_ia32_pternlogq256_mask: return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops); case X86::BI__builtin_ia32_pternlogd512_maskz: case X86::BI__builtin_ia32_pternlogq512_maskz: case X86::BI__builtin_ia32_pternlogd128_maskz: case X86::BI__builtin_ia32_pternlogd256_maskz: case X86::BI__builtin_ia32_pternlogq128_maskz: case X86::BI__builtin_ia32_pternlogq256_maskz: return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops); case X86::BI__builtin_ia32_vpshldd128: case X86::BI__builtin_ia32_vpshldd256: case X86::BI__builtin_ia32_vpshldd512: case X86::BI__builtin_ia32_vpshldq128: case X86::BI__builtin_ia32_vpshldq256: case X86::BI__builtin_ia32_vpshldq512: case X86::BI__builtin_ia32_vpshldw128: case X86::BI__builtin_ia32_vpshldw256: case X86::BI__builtin_ia32_vpshldw512: return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); case X86::BI__builtin_ia32_vpshrdd128: case X86::BI__builtin_ia32_vpshrdd256: case X86::BI__builtin_ia32_vpshrdd512: case X86::BI__builtin_ia32_vpshrdq128: case X86::BI__builtin_ia32_vpshrdq256: case X86::BI__builtin_ia32_vpshrdq512: case X86::BI__builtin_ia32_vpshrdw128: case X86::BI__builtin_ia32_vpshrdw256: case X86::BI__builtin_ia32_vpshrdw512: // Ops 0 and 1 are swapped. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); case X86::BI__builtin_ia32_vpshldvd128: case X86::BI__builtin_ia32_vpshldvd256: case X86::BI__builtin_ia32_vpshldvd512: case X86::BI__builtin_ia32_vpshldvq128: case X86::BI__builtin_ia32_vpshldvq256: case X86::BI__builtin_ia32_vpshldvq512: case X86::BI__builtin_ia32_vpshldvw128: case X86::BI__builtin_ia32_vpshldvw256: case X86::BI__builtin_ia32_vpshldvw512: return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false); case X86::BI__builtin_ia32_vpshrdvd128: case X86::BI__builtin_ia32_vpshrdvd256: case X86::BI__builtin_ia32_vpshrdvd512: case X86::BI__builtin_ia32_vpshrdvq128: case X86::BI__builtin_ia32_vpshrdvq256: case X86::BI__builtin_ia32_vpshrdvq512: case X86::BI__builtin_ia32_vpshrdvw128: case X86::BI__builtin_ia32_vpshrdvw256: case X86::BI__builtin_ia32_vpshrdvw512: // Ops 0 and 1 are swapped. return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true); // Reductions case X86::BI__builtin_ia32_reduce_fadd_pd512: case X86::BI__builtin_ia32_reduce_fadd_ps512: case X86::BI__builtin_ia32_reduce_fadd_ph512: case X86::BI__builtin_ia32_reduce_fadd_ph256: case X86::BI__builtin_ia32_reduce_fadd_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType()); IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_fmul_pd512: case X86::BI__builtin_ia32_reduce_fmul_ps512: case X86::BI__builtin_ia32_reduce_fmul_ph512: case X86::BI__builtin_ia32_reduce_fmul_ph256: case X86::BI__builtin_ia32_reduce_fmul_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType()); IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setAllowReassoc(); return Builder.CreateCall(F, {Ops[0], Ops[1]}); } case X86::BI__builtin_ia32_reduce_fmax_pd512: case X86::BI__builtin_ia32_reduce_fmax_ps512: case X86::BI__builtin_ia32_reduce_fmax_ph512: case X86::BI__builtin_ia32_reduce_fmax_ph256: case X86::BI__builtin_ia32_reduce_fmax_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmax, Ops[0]->getType()); IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } case X86::BI__builtin_ia32_reduce_fmin_pd512: case X86::BI__builtin_ia32_reduce_fmin_ps512: case X86::BI__builtin_ia32_reduce_fmin_ph512: case X86::BI__builtin_ia32_reduce_fmin_ph256: case X86::BI__builtin_ia32_reduce_fmin_ph128: { Function *F = CGM.getIntrinsic(Intrinsic::vector_reduce_fmin, Ops[0]->getType()); IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); Builder.getFastMathFlags().setNoNaNs(); return Builder.CreateCall(F, {Ops[0]}); } case X86::BI__builtin_ia32_rdrand16_step: case X86::BI__builtin_ia32_rdrand32_step: case X86::BI__builtin_ia32_rdrand64_step: case X86::BI__builtin_ia32_rdseed16_step: case X86::BI__builtin_ia32_rdseed32_step: case X86::BI__builtin_ia32_rdseed64_step: { Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_rdrand16_step: ID = Intrinsic::x86_rdrand_16; break; case X86::BI__builtin_ia32_rdrand32_step: ID = Intrinsic::x86_rdrand_32; break; case X86::BI__builtin_ia32_rdrand64_step: ID = Intrinsic::x86_rdrand_64; break; case X86::BI__builtin_ia32_rdseed16_step: ID = Intrinsic::x86_rdseed_16; break; case X86::BI__builtin_ia32_rdseed32_step: ID = Intrinsic::x86_rdseed_32; break; case X86::BI__builtin_ia32_rdseed64_step: ID = Intrinsic::x86_rdseed_64; break; } Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), Ops[0]); return Builder.CreateExtractValue(Call, 1); } case X86::BI__builtin_ia32_addcarryx_u32: case X86::BI__builtin_ia32_addcarryx_u64: case X86::BI__builtin_ia32_subborrow_u32: case X86::BI__builtin_ia32_subborrow_u64: { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_addcarryx_u32: IID = Intrinsic::x86_addcarry_32; break; case X86::BI__builtin_ia32_addcarryx_u64: IID = Intrinsic::x86_addcarry_64; break; case X86::BI__builtin_ia32_subborrow_u32: IID = Intrinsic::x86_subborrow_32; break; case X86::BI__builtin_ia32_subborrow_u64: IID = Intrinsic::x86_subborrow_64; break; } Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), { Ops[0], Ops[1], Ops[2] }); Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1), Ops[3]); return Builder.CreateExtractValue(Call, 0); } case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: case X86::BI__builtin_ia32_fpclassps512_mask: case X86::BI__builtin_ia32_fpclassph128_mask: case X86::BI__builtin_ia32_fpclassph256_mask: case X86::BI__builtin_ia32_fpclassph512_mask: case X86::BI__builtin_ia32_fpclasspd128_mask: case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Value *MaskIn = Ops[2]; Ops.erase(&Ops[2]); Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_fpclassph128_mask: ID = Intrinsic::x86_avx512fp16_fpclass_ph_128; break; case X86::BI__builtin_ia32_fpclassph256_mask: ID = Intrinsic::x86_avx512fp16_fpclass_ph_256; break; case X86::BI__builtin_ia32_fpclassph512_mask: ID = Intrinsic::x86_avx512fp16_fpclass_ph_512; break; case X86::BI__builtin_ia32_fpclassps128_mask: ID = Intrinsic::x86_avx512_fpclass_ps_128; break; case X86::BI__builtin_ia32_fpclassps256_mask: ID = Intrinsic::x86_avx512_fpclass_ps_256; break; case X86::BI__builtin_ia32_fpclassps512_mask: ID = Intrinsic::x86_avx512_fpclass_ps_512; break; case X86::BI__builtin_ia32_fpclasspd128_mask: ID = Intrinsic::x86_avx512_fpclass_pd_128; break; case X86::BI__builtin_ia32_fpclasspd256_mask: ID = Intrinsic::x86_avx512_fpclass_pd_256; break; case X86::BI__builtin_ia32_fpclasspd512_mask: ID = Intrinsic::x86_avx512_fpclass_pd_512; break; } Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn); } case X86::BI__builtin_ia32_vp2intersect_q_512: case X86::BI__builtin_ia32_vp2intersect_q_256: case X86::BI__builtin_ia32_vp2intersect_q_128: case X86::BI__builtin_ia32_vp2intersect_d_512: case X86::BI__builtin_ia32_vp2intersect_d_256: case X86::BI__builtin_ia32_vp2intersect_d_128: { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_vp2intersect_q_512: ID = Intrinsic::x86_avx512_vp2intersect_q_512; break; case X86::BI__builtin_ia32_vp2intersect_q_256: ID = Intrinsic::x86_avx512_vp2intersect_q_256; break; case X86::BI__builtin_ia32_vp2intersect_q_128: ID = Intrinsic::x86_avx512_vp2intersect_q_128; break; case X86::BI__builtin_ia32_vp2intersect_d_512: ID = Intrinsic::x86_avx512_vp2intersect_d_512; break; case X86::BI__builtin_ia32_vp2intersect_d_256: ID = Intrinsic::x86_avx512_vp2intersect_d_256; break; case X86::BI__builtin_ia32_vp2intersect_d_128: ID = Intrinsic::x86_avx512_vp2intersect_d_128; break; } Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]}); Value *Result = Builder.CreateExtractValue(Call, 0); Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); Builder.CreateDefaultAlignedStore(Result, Ops[2]); Result = Builder.CreateExtractValue(Call, 1); Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr); return Builder.CreateDefaultAlignedStore(Result, Ops[3]); } case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: { Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_vpmultishiftqb128: ID = Intrinsic::x86_avx512_pmultishift_qb_128; break; case X86::BI__builtin_ia32_vpmultishiftqb256: ID = Intrinsic::x86_avx512_pmultishift_qb_256; break; case X86::BI__builtin_ia32_vpmultishiftqb512: ID = Intrinsic::x86_avx512_pmultishift_qb_512; break; } return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } case X86::BI__builtin_ia32_vpshufbitqmb128_mask: case X86::BI__builtin_ia32_vpshufbitqmb256_mask: case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Value *MaskIn = Ops[2]; Ops.erase(&Ops[2]); Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_vpshufbitqmb128_mask: ID = Intrinsic::x86_avx512_vpshufbitqmb_128; break; case X86::BI__builtin_ia32_vpshufbitqmb256_mask: ID = Intrinsic::x86_avx512_vpshufbitqmb_256; break; case X86::BI__builtin_ia32_vpshufbitqmb512_mask: ID = Intrinsic::x86_avx512_vpshufbitqmb_512; break; } Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops); return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn); } // packed comparison intrinsics case X86::BI__builtin_ia32_cmpeqps: case X86::BI__builtin_ia32_cmpeqpd: return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpltps: case X86::BI__builtin_ia32_cmpltpd: return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpleps: case X86::BI__builtin_ia32_cmplepd: return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpunordps: case X86::BI__builtin_ia32_cmpunordpd: return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpneqps: case X86::BI__builtin_ia32_cmpneqpd: return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpnltps: case X86::BI__builtin_ia32_cmpnltpd: return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpnleps: case X86::BI__builtin_ia32_cmpnlepd: return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true); case X86::BI__builtin_ia32_cmpordps: case X86::BI__builtin_ia32_cmpordpd: return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false); case X86::BI__builtin_ia32_cmpph128_mask: case X86::BI__builtin_ia32_cmpph256_mask: case X86::BI__builtin_ia32_cmpph512_mask: case X86::BI__builtin_ia32_cmpps128_mask: case X86::BI__builtin_ia32_cmpps256_mask: case X86::BI__builtin_ia32_cmpps512_mask: case X86::BI__builtin_ia32_cmppd128_mask: case X86::BI__builtin_ia32_cmppd256_mask: case X86::BI__builtin_ia32_cmppd512_mask: IsMaskFCmp = true; [[fallthrough]]; case X86::BI__builtin_ia32_cmpps: case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: case X86::BI__builtin_ia32_cmppd256: { // Lowering vector comparisons to fcmp instructions, while // ignoring signalling behaviour requested // ignoring rounding mode requested // This is only possible if fp-model is not strict and FENV_ACCESS is off. // The third argument is the comparison condition, and integer in the // range [0, 31] unsigned CC = cast(Ops[2])->getZExtValue() & 0x1f; // Lowering to IR fcmp instruction. // Ignoring requested signaling behaviour, // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT. FCmpInst::Predicate Pred; bool IsSignaling; // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling // behavior is inverted. We'll handle that after the switch. switch (CC & 0xf) { case 0x00: Pred = FCmpInst::FCMP_OEQ; IsSignaling = false; break; case 0x01: Pred = FCmpInst::FCMP_OLT; IsSignaling = true; break; case 0x02: Pred = FCmpInst::FCMP_OLE; IsSignaling = true; break; case 0x03: Pred = FCmpInst::FCMP_UNO; IsSignaling = false; break; case 0x04: Pred = FCmpInst::FCMP_UNE; IsSignaling = false; break; case 0x05: Pred = FCmpInst::FCMP_UGE; IsSignaling = true; break; case 0x06: Pred = FCmpInst::FCMP_UGT; IsSignaling = true; break; case 0x07: Pred = FCmpInst::FCMP_ORD; IsSignaling = false; break; case 0x08: Pred = FCmpInst::FCMP_UEQ; IsSignaling = false; break; case 0x09: Pred = FCmpInst::FCMP_ULT; IsSignaling = true; break; case 0x0a: Pred = FCmpInst::FCMP_ULE; IsSignaling = true; break; case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break; case 0x0c: Pred = FCmpInst::FCMP_ONE; IsSignaling = false; break; case 0x0d: Pred = FCmpInst::FCMP_OGE; IsSignaling = true; break; case 0x0e: Pred = FCmpInst::FCMP_OGT; IsSignaling = true; break; case 0x0f: Pred = FCmpInst::FCMP_TRUE; IsSignaling = false; break; default: llvm_unreachable("Unhandled CC"); } // Invert the signalling behavior for 16-31. if (CC & 0x10) IsSignaling = !IsSignaling; // If the predicate is true or false and we're using constrained intrinsics, // we don't have a compare intrinsic we can use. Just use the legacy X86 // specific intrinsic. // If the intrinsic is mask enabled and we're using constrained intrinsics, // use the legacy X86 specific intrinsic. if (Builder.getIsFPConstrained() && (Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE || IsMaskFCmp)) { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unexpected builtin"); case X86::BI__builtin_ia32_cmpps: IID = Intrinsic::x86_sse_cmp_ps; break; case X86::BI__builtin_ia32_cmpps256: IID = Intrinsic::x86_avx_cmp_ps_256; break; case X86::BI__builtin_ia32_cmppd: IID = Intrinsic::x86_sse2_cmp_pd; break; case X86::BI__builtin_ia32_cmppd256: IID = Intrinsic::x86_avx_cmp_pd_256; break; case X86::BI__builtin_ia32_cmpph128_mask: IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_128; break; case X86::BI__builtin_ia32_cmpph256_mask: IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_256; break; case X86::BI__builtin_ia32_cmpph512_mask: IID = Intrinsic::x86_avx512fp16_mask_cmp_ph_512; break; case X86::BI__builtin_ia32_cmpps512_mask: IID = Intrinsic::x86_avx512_mask_cmp_ps_512; break; case X86::BI__builtin_ia32_cmppd512_mask: IID = Intrinsic::x86_avx512_mask_cmp_pd_512; break; case X86::BI__builtin_ia32_cmpps128_mask: IID = Intrinsic::x86_avx512_mask_cmp_ps_128; break; case X86::BI__builtin_ia32_cmpps256_mask: IID = Intrinsic::x86_avx512_mask_cmp_ps_256; break; case X86::BI__builtin_ia32_cmppd128_mask: IID = Intrinsic::x86_avx512_mask_cmp_pd_128; break; case X86::BI__builtin_ia32_cmppd256_mask: IID = Intrinsic::x86_avx512_mask_cmp_pd_256; break; } Function *Intr = CGM.getIntrinsic(IID); if (IsMaskFCmp) { unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Ops[3] = getMaskVecValue(*this, Ops[3], NumElts); Value *Cmp = Builder.CreateCall(Intr, Ops); return EmitX86MaskedCompareResult(*this, Cmp, NumElts, nullptr); } return Builder.CreateCall(Intr, Ops); } // Builtins without the _mask suffix return a vector of integers // of the same width as the input vectors if (IsMaskFCmp) { // We ignore SAE if strict FP is disabled. We only keep precise // exception behavior under strict FP. // NOTE: If strict FP does ever go through here a CGFPOptionsRAII // object will be required. unsigned NumElts = cast(Ops[0]->getType())->getNumElements(); Value *Cmp; if (IsSignaling) Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]); else Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]); } return getVectorFCmpIR(Pred, IsSignaling); } // SSE scalar comparison intrinsics case X86::BI__builtin_ia32_cmpeqss: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); case X86::BI__builtin_ia32_cmpltss: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); case X86::BI__builtin_ia32_cmpless: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); case X86::BI__builtin_ia32_cmpunordss: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); case X86::BI__builtin_ia32_cmpneqss: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); case X86::BI__builtin_ia32_cmpnltss: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); case X86::BI__builtin_ia32_cmpnless: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); case X86::BI__builtin_ia32_cmpordss: return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); case X86::BI__builtin_ia32_cmpeqsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); case X86::BI__builtin_ia32_cmpltsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); case X86::BI__builtin_ia32_cmplesd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); case X86::BI__builtin_ia32_cmpunordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); case X86::BI__builtin_ia32_cmpneqsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); case X86::BI__builtin_ia32_cmpnltsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); case X86::BI__builtin_ia32_cmpnlesd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); case X86::BI__builtin_ia32_cmpordsd: return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); // f16c half2float intrinsics case X86::BI__builtin_ia32_vcvtph2ps: case X86::BI__builtin_ia32_vcvtph2ps256: case X86::BI__builtin_ia32_vcvtph2ps_mask: case X86::BI__builtin_ia32_vcvtph2ps256_mask: case X86::BI__builtin_ia32_vcvtph2ps512_mask: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); return EmitX86CvtF16ToFloatExpr(*this, Ops, ConvertType(E->getType())); } // AVX512 bf16 intrinsics case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: { Ops[2] = getMaskVecValue( *this, Ops[2], cast(Ops[0]->getType())->getNumElements()); Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128; return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } case X86::BI__builtin_ia32_cvtsbf162ss_32: return Builder.CreateFPExt(Ops[0], Builder.getFloatTy()); case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: { Intrinsic::ID IID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256; break; case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512; break; } Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]); return EmitX86Select(*this, Ops[2], Res, Ops[1]); } case X86::BI__cpuid: case X86::BI__cpuidex: { Value *FuncId = EmitScalarExpr(E->getArg(1)); Value *SubFuncId = BuiltinID == X86::BI__cpuidex ? EmitScalarExpr(E->getArg(2)) : llvm::ConstantInt::get(Int32Ty, 0); llvm::StructType *CpuidRetTy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty); llvm::FunctionType *FTy = llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false); StringRef Asm, Constraints; if (getTarget().getTriple().getArch() == llvm::Triple::x86) { Asm = "cpuid"; Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}"; } else { // x86-64 uses %rbx as the base register, so preserve it. Asm = "xchgq %rbx, ${1:q}\n" "cpuid\n" "xchgq %rbx, ${1:q}"; Constraints = "={ax},=r,={cx},={dx},0,2"; } llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/false); Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId}); Value *BasePtr = EmitScalarExpr(E->getArg(0)); Value *Store = nullptr; for (unsigned i = 0; i < 4; i++) { Value *Extracted = Builder.CreateExtractValue(IACall, i); Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i); Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign()); } // Return the last store instruction to signal that we have emitted the // the intrinsic. return Store; } case X86::BI__emul: case X86::BI__emulu: { llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64); bool isSigned = (BuiltinID == X86::BI__emul); Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned); Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned); return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned); } case X86::BI__mulh: case X86::BI__umulh: case X86::BI_mul128: case X86::BI_umul128: { llvm::Type *ResType = ConvertType(E->getType()); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128); Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned); Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned); Value *MulResult, *HigherBits; if (IsSigned) { MulResult = Builder.CreateNSWMul(LHS, RHS); HigherBits = Builder.CreateAShr(MulResult, 64); } else { MulResult = Builder.CreateNUWMul(LHS, RHS); HigherBits = Builder.CreateLShr(MulResult, 64); } HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned); if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh) return HigherBits; Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2)); Builder.CreateStore(HigherBits, HighBitsAddress); return Builder.CreateIntCast(MulResult, ResType, IsSigned); } case X86::BI__faststorefence: { return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::System); } case X86::BI__shiftleft128: case X86::BI__shiftright128: { llvm::Function *F = CGM.getIntrinsic( BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr, Int64Ty); // Flip low/high ops and zero-extend amount to matching type. // shiftleft128(Low, High, Amt) -> fshl(High, Low, Amt) // shiftright128(Low, High, Amt) -> fshr(High, Low, Amt) std::swap(Ops[0], Ops[1]); Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); return Builder.CreateCall(F, Ops); } case X86::BI_ReadWriteBarrier: case X86::BI_ReadBarrier: case X86::BI_WriteBarrier: { return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, llvm::SyncScope::SingleThread); } case X86::BI_AddressOfReturnAddress: { Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress, AllocaInt8PtrTy); return Builder.CreateCall(F); } case X86::BI__stosb: { // We treat __stosb as a volatile memset - it may not generate "rep stosb" // instruction, but it will create a memset that won't be optimized away. return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], Align(1), true); } case X86::BI__ud2: // llvm.trap makes a ud2a instruction on x86. return EmitTrapCall(Intrinsic::trap); case X86::BI__int2c: { // This syscall signals a driver assertion failure in x86 NT kernels. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true); llvm::AttributeList NoReturnAttr = llvm::AttributeList::get( getLLVMContext(), llvm::AttributeList::FunctionIndex, llvm::Attribute::NoReturn); llvm::CallInst *CI = Builder.CreateCall(IA); CI->setAttributes(NoReturnAttr); return CI; } case X86::BI__readfsbyte: case X86::BI__readfsword: case X86::BI__readfsdword: case X86::BI__readfsqword: { llvm::Type *IntTy = ConvertType(E->getType()); Value *Ptr = Builder.CreateIntToPtr( Ops[0], llvm::PointerType::get(getLLVMContext(), 257)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); return Load; } case X86::BI__readgsbyte: case X86::BI__readgsword: case X86::BI__readgsdword: case X86::BI__readgsqword: { llvm::Type *IntTy = ConvertType(E->getType()); Value *Ptr = Builder.CreateIntToPtr( Ops[0], llvm::PointerType::get(getLLVMContext(), 256)); LoadInst *Load = Builder.CreateAlignedLoad( IntTy, Ptr, getContext().getTypeAlignInChars(E->getType())); Load->setVolatile(true); return Load; } case X86::BI__builtin_ia32_encodekey128_u32: { Intrinsic::ID IID = Intrinsic::x86_encodekey128; Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1]}); for (int i = 0; i < 3; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[2], i * 16); Builder.CreateAlignedStore(Extract, Ptr, Align(1)); } return Builder.CreateExtractValue(Call, 0); } case X86::BI__builtin_ia32_encodekey256_u32: { Intrinsic::ID IID = Intrinsic::x86_encodekey256; Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[0], Ops[1], Ops[2]}); for (int i = 0; i < 4; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Int8Ty, Ops[3], i * 16); Builder.CreateAlignedStore(Extract, Ptr, Align(1)); } return Builder.CreateExtractValue(Call, 0); } case X86::BI__builtin_ia32_aesenc128kl_u8: case X86::BI__builtin_ia32_aesdec128kl_u8: case X86::BI__builtin_ia32_aesenc256kl_u8: case X86::BI__builtin_ia32_aesdec256kl_u8: { Intrinsic::ID IID; StringRef BlockName; switch (BuiltinID) { default: llvm_unreachable("Unexpected builtin"); case X86::BI__builtin_ia32_aesenc128kl_u8: IID = Intrinsic::x86_aesenc128kl; BlockName = "aesenc128kl"; break; case X86::BI__builtin_ia32_aesdec128kl_u8: IID = Intrinsic::x86_aesdec128kl; BlockName = "aesdec128kl"; break; case X86::BI__builtin_ia32_aesenc256kl_u8: IID = Intrinsic::x86_aesenc256kl; BlockName = "aesenc256kl"; break; case X86::BI__builtin_ia32_aesdec256kl_u8: IID = Intrinsic::x86_aesdec256kl; BlockName = "aesdec256kl"; break; } Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), {Ops[1], Ops[2]}); BasicBlock *NoError = createBasicBlock(BlockName + "_no_error", this->CurFn); BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn); BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn); Value *Ret = Builder.CreateExtractValue(Call, 0); Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); Value *Out = Builder.CreateExtractValue(Call, 1); Builder.CreateCondBr(Succ, NoError, Error); Builder.SetInsertPoint(NoError); Builder.CreateDefaultAlignedStore(Out, Ops[0]); Builder.CreateBr(End); Builder.SetInsertPoint(Error); Constant *Zero = llvm::Constant::getNullValue(Out->getType()); Builder.CreateDefaultAlignedStore(Zero, Ops[0]); Builder.CreateBr(End); Builder.SetInsertPoint(End); return Builder.CreateExtractValue(Call, 0); } case X86::BI__builtin_ia32_aesencwide128kl_u8: case X86::BI__builtin_ia32_aesdecwide128kl_u8: case X86::BI__builtin_ia32_aesencwide256kl_u8: case X86::BI__builtin_ia32_aesdecwide256kl_u8: { Intrinsic::ID IID; StringRef BlockName; switch (BuiltinID) { case X86::BI__builtin_ia32_aesencwide128kl_u8: IID = Intrinsic::x86_aesencwide128kl; BlockName = "aesencwide128kl"; break; case X86::BI__builtin_ia32_aesdecwide128kl_u8: IID = Intrinsic::x86_aesdecwide128kl; BlockName = "aesdecwide128kl"; break; case X86::BI__builtin_ia32_aesencwide256kl_u8: IID = Intrinsic::x86_aesencwide256kl; BlockName = "aesencwide256kl"; break; case X86::BI__builtin_ia32_aesdecwide256kl_u8: IID = Intrinsic::x86_aesdecwide256kl; BlockName = "aesdecwide256kl"; break; } llvm::Type *Ty = FixedVectorType::get(Builder.getInt64Ty(), 2); Value *InOps[9]; InOps[0] = Ops[2]; for (int i = 0; i != 8; ++i) { Value *Ptr = Builder.CreateConstGEP1_32(Ty, Ops[1], i); InOps[i + 1] = Builder.CreateAlignedLoad(Ty, Ptr, Align(16)); } Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), InOps); BasicBlock *NoError = createBasicBlock(BlockName + "_no_error", this->CurFn); BasicBlock *Error = createBasicBlock(BlockName + "_error", this->CurFn); BasicBlock *End = createBasicBlock(BlockName + "_end", this->CurFn); Value *Ret = Builder.CreateExtractValue(Call, 0); Value *Succ = Builder.CreateTrunc(Ret, Builder.getInt1Ty()); Builder.CreateCondBr(Succ, NoError, Error); Builder.SetInsertPoint(NoError); for (int i = 0; i != 8; ++i) { Value *Extract = Builder.CreateExtractValue(Call, i + 1); Value *Ptr = Builder.CreateConstGEP1_32(Extract->getType(), Ops[0], i); Builder.CreateAlignedStore(Extract, Ptr, Align(16)); } Builder.CreateBr(End); Builder.SetInsertPoint(Error); for (int i = 0; i != 8; ++i) { Value *Out = Builder.CreateExtractValue(Call, i + 1); Constant *Zero = llvm::Constant::getNullValue(Out->getType()); Value *Ptr = Builder.CreateConstGEP1_32(Out->getType(), Ops[0], i); Builder.CreateAlignedStore(Zero, Ptr, Align(16)); } Builder.CreateBr(End); Builder.SetInsertPoint(End); return Builder.CreateExtractValue(Call, 0); } case X86::BI__builtin_ia32_vfcmaddcph512_mask: IsConjFMA = true; [[fallthrough]]; case X86::BI__builtin_ia32_vfmaddcph512_mask: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512 : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512; Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); return EmitX86Select(*this, Ops[3], Call, Ops[0]); } case X86::BI__builtin_ia32_vfcmaddcsh_round_mask: IsConjFMA = true; [[fallthrough]]; case X86::BI__builtin_ia32_vfmaddcsh_round_mask: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1)); return EmitX86Select(*this, And, Call, Ops[0]); } case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3: IsConjFMA = true; [[fallthrough]]; case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: { Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh : Intrinsic::x86_avx512fp16_mask_vfmadd_csh; Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops); static constexpr int Mask[] = {0, 5, 6, 7}; return Builder.CreateShuffleVector(Call, Ops[2], Mask); } case X86::BI__builtin_ia32_prefetchi: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::prefetch, Ops[0]->getType()), {Ops[0], llvm::ConstantInt::get(Int32Ty, 0), Ops[1], llvm::ConstantInt::get(Int32Ty, 0)}); } } Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { // Do not emit the builtin arguments in the arguments of a function call, // because the evaluation order of function arguments is not specified in C++. // This is important when testing to ensure the arguments are emitted in the // same order every time. Eg: // Instead of: // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), // EmitScalarExpr(E->getArg(1)), "swdiv"); // Use: // Value *Op0 = EmitScalarExpr(E->getArg(0)); // Value *Op1 = EmitScalarExpr(E->getArg(1)); // return Builder.CreateFDiv(Op0, Op1, "swdiv") Intrinsic::ID ID = Intrinsic::not_intrinsic; #include "llvm/TargetParser/PPCTargetParser.def" auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, unsigned Mask, CmpInst::Predicate CompOp, unsigned OpValue) -> Value * { if (SupportMethod == BUILTIN_PPC_FALSE) return llvm::ConstantInt::getFalse(ConvertType(E->getType())); if (SupportMethod == BUILTIN_PPC_TRUE) return llvm::ConstantInt::getTrue(ConvertType(E->getType())); assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod."); llvm::Value *FieldValue = nullptr; if (SupportMethod == USE_SYS_CONF) { llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE); llvm::Constant *SysConf = CGM.CreateRuntimeVariable(STy, "_system_configuration"); // Grab the appropriate field from _system_configuration. llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, FieldIdx)}; FieldValue = Builder.CreateInBoundsGEP(STy, SysConf, Idxs); FieldValue = Builder.CreateAlignedLoad(Int32Ty, FieldValue, CharUnits::fromQuantity(4)); } else if (SupportMethod == SYS_CALL) { llvm::FunctionType *FTy = llvm::FunctionType::get(Int64Ty, Int32Ty, false); llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, "getsystemcfg"); FieldValue = Builder.CreateCall(Func, {ConstantInt::get(Int32Ty, FieldIdx)}); } assert(FieldValue && "SupportMethod value is not defined in PPCTargetParser.def."); if (Mask) FieldValue = Builder.CreateAnd(FieldValue, Mask); llvm::Type *ValueType = FieldValue->getType(); bool IsValueType64Bit = ValueType->isIntegerTy(64); assert( (IsValueType64Bit || ValueType->isIntegerTy(32)) && "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr()."); return Builder.CreateICmp( CompOp, FieldValue, ConstantInt::get(IsValueType64Bit ? Int64Ty : Int32Ty, OpValue)); }; switch (BuiltinID) { default: return nullptr; case Builtin::BI__builtin_cpu_is: { const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast(CPUExpr)->getString(); llvm::Triple Triple = getTarget().getTriple(); unsigned LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue; typedef std::tuple CPUInfo; std::tie(LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue) = static_cast(StringSwitch(CPUStr) #define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \ AIXID) \ .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID}) #include "llvm/TargetParser/PPCTargetParser.def" .Default({BUILTIN_PPC_UNSUPPORTED, 0, BUILTIN_PPC_UNSUPPORTED, 0})); if (Triple.isOSAIX()) { assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) && "Invalid CPU name. Missed by SemaChecking?"); return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0, ICmpInst::ICMP_EQ, AIXIDValue); } assert(Triple.isOSLinux() && "__builtin_cpu_is() is only supported for AIX and Linux."); assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) && "Invalid CPU name. Missed by SemaChecking?"); if (LinuxSupportMethod == BUILTIN_PPC_FALSE) return llvm::ConstantInt::getFalse(ConvertType(E->getType())); Value *Op0 = llvm::ConstantInt::get(Int32Ty, PPC_FAWORD_CPUID); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); return Builder.CreateICmpEQ(TheCall, llvm::ConstantInt::get(Int32Ty, LinuxIDValue)); } case Builtin::BI__builtin_cpu_supports: { llvm::Triple Triple = getTarget().getTriple(); const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); StringRef CPUStr = cast(CPUExpr)->getString(); if (Triple.isOSAIX()) { unsigned SupportMethod, FieldIdx, Mask, Value; CmpInst::Predicate CompOp; typedef std::tuple CPUSupportType; std::tie(SupportMethod, FieldIdx, Mask, CompOp, Value) = static_cast(StringSwitch(CPUStr) #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \ VALUE) \ .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE}) #include "llvm/TargetParser/PPCTargetParser.def" .Default({BUILTIN_PPC_FALSE, 0, 0, CmpInst::Predicate(), 0})); return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, Value); } assert(Triple.isOSLinux() && "__builtin_cpu_supports() is only supported for AIX and Linux."); unsigned FeatureWord; unsigned BitMask; std::tie(FeatureWord, BitMask) = StringSwitch>(CPUStr) #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ .Case(Name, {FA_WORD, Bitmask}) #include "llvm/TargetParser/PPCTargetParser.def" .Default({0, 0}); if (!BitMask) return Builder.getFalse(); Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_fixed_addr_ld); Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); Value *Mask = Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); #undef PPC_FAWORD_HWCAP #undef PPC_FAWORD_HWCAP2 #undef PPC_FAWORD_CPUID } // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we // call __builtin_readcyclecounter. case PPC::BI__builtin_ppc_get_timebase: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr case PPC::BI__builtin_altivec_lvx: case PPC::BI__builtin_altivec_lvxl: case PPC::BI__builtin_altivec_lvebx: case PPC::BI__builtin_altivec_lvehx: case PPC::BI__builtin_altivec_lvewx: case PPC::BI__builtin_altivec_lvsl: case PPC::BI__builtin_altivec_lvsr: case PPC::BI__builtin_vsx_lxvd2x: case PPC::BI__builtin_vsx_lxvw4x: case PPC::BI__builtin_vsx_lxvd2x_be: case PPC::BI__builtin_vsx_lxvw4x_be: case PPC::BI__builtin_vsx_lxvl: case PPC::BI__builtin_vsx_lxvll: { SmallVector Ops; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops.push_back(EmitScalarExpr(E->getArg(1))); if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl || BuiltinID == PPC::BI__builtin_vsx_lxvll)) { Ops[0] = Builder.CreateGEP(Int8Ty, Ops[1], Ops[0]); Ops.pop_back(); } switch (BuiltinID) { default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); case PPC::BI__builtin_altivec_lvx: ID = Intrinsic::ppc_altivec_lvx; break; case PPC::BI__builtin_altivec_lvxl: ID = Intrinsic::ppc_altivec_lvxl; break; case PPC::BI__builtin_altivec_lvebx: ID = Intrinsic::ppc_altivec_lvebx; break; case PPC::BI__builtin_altivec_lvehx: ID = Intrinsic::ppc_altivec_lvehx; break; case PPC::BI__builtin_altivec_lvewx: ID = Intrinsic::ppc_altivec_lvewx; break; case PPC::BI__builtin_altivec_lvsl: ID = Intrinsic::ppc_altivec_lvsl; break; case PPC::BI__builtin_altivec_lvsr: ID = Intrinsic::ppc_altivec_lvsr; break; case PPC::BI__builtin_vsx_lxvd2x: ID = Intrinsic::ppc_vsx_lxvd2x; break; case PPC::BI__builtin_vsx_lxvw4x: ID = Intrinsic::ppc_vsx_lxvw4x; break; case PPC::BI__builtin_vsx_lxvd2x_be: ID = Intrinsic::ppc_vsx_lxvd2x_be; break; case PPC::BI__builtin_vsx_lxvw4x_be: ID = Intrinsic::ppc_vsx_lxvw4x_be; break; case PPC::BI__builtin_vsx_lxvl: ID = Intrinsic::ppc_vsx_lxvl; break; case PPC::BI__builtin_vsx_lxvll: ID = Intrinsic::ppc_vsx_lxvll; break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } // vec_st, vec_xst_be case PPC::BI__builtin_altivec_stvx: case PPC::BI__builtin_altivec_stvxl: case PPC::BI__builtin_altivec_stvebx: case PPC::BI__builtin_altivec_stvehx: case PPC::BI__builtin_altivec_stvewx: case PPC::BI__builtin_vsx_stxvd2x: case PPC::BI__builtin_vsx_stxvw4x: case PPC::BI__builtin_vsx_stxvd2x_be: case PPC::BI__builtin_vsx_stxvw4x_be: case PPC::BI__builtin_vsx_stxvl: case PPC::BI__builtin_vsx_stxvll: { SmallVector Ops; Ops.push_back(EmitScalarExpr(E->getArg(0))); Ops.push_back(EmitScalarExpr(E->getArg(1))); Ops.push_back(EmitScalarExpr(E->getArg(2))); if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl || BuiltinID == PPC::BI__builtin_vsx_stxvll)) { Ops[1] = Builder.CreateGEP(Int8Ty, Ops[2], Ops[1]); Ops.pop_back(); } switch (BuiltinID) { default: llvm_unreachable("Unsupported st intrinsic!"); case PPC::BI__builtin_altivec_stvx: ID = Intrinsic::ppc_altivec_stvx; break; case PPC::BI__builtin_altivec_stvxl: ID = Intrinsic::ppc_altivec_stvxl; break; case PPC::BI__builtin_altivec_stvebx: ID = Intrinsic::ppc_altivec_stvebx; break; case PPC::BI__builtin_altivec_stvehx: ID = Intrinsic::ppc_altivec_stvehx; break; case PPC::BI__builtin_altivec_stvewx: ID = Intrinsic::ppc_altivec_stvewx; break; case PPC::BI__builtin_vsx_stxvd2x: ID = Intrinsic::ppc_vsx_stxvd2x; break; case PPC::BI__builtin_vsx_stxvw4x: ID = Intrinsic::ppc_vsx_stxvw4x; break; case PPC::BI__builtin_vsx_stxvd2x_be: ID = Intrinsic::ppc_vsx_stxvd2x_be; break; case PPC::BI__builtin_vsx_stxvw4x_be: ID = Intrinsic::ppc_vsx_stxvw4x_be; break; case PPC::BI__builtin_vsx_stxvl: ID = Intrinsic::ppc_vsx_stxvl; break; case PPC::BI__builtin_vsx_stxvll: ID = Intrinsic::ppc_vsx_stxvll; break; } llvm::Function *F = CGM.getIntrinsic(ID); return Builder.CreateCall(F, Ops, ""); } case PPC::BI__builtin_vsx_ldrmb: { // Essentially boils down to performing an unaligned VMX load sequence so // as to avoid crossing a page boundary and then shuffling the elements // into the right side of the vector register. Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); int64_t NumBytes = cast(Op1)->getZExtValue(); llvm::Type *ResTy = ConvertType(E->getType()); bool IsLE = getTarget().isLittleEndian(); // If the user wants the entire vector, just load the entire vector. if (NumBytes == 16) { Value *LD = Builder.CreateLoad(Address(Op0, ResTy, CharUnits::fromQuantity(1))); if (!IsLE) return LD; // Reverse the bytes on LE. SmallVector RevMask; for (int Idx = 0; Idx < 16; Idx++) RevMask.push_back(15 - Idx); return Builder.CreateShuffleVector(LD, LD, RevMask); } llvm::Function *Lvx = CGM.getIntrinsic(Intrinsic::ppc_altivec_lvx); llvm::Function *Lvs = CGM.getIntrinsic(IsLE ? Intrinsic::ppc_altivec_lvsr : Intrinsic::ppc_altivec_lvsl); llvm::Function *Vperm = CGM.getIntrinsic(Intrinsic::ppc_altivec_vperm); Value *HiMem = Builder.CreateGEP( Int8Ty, Op0, ConstantInt::get(Op1->getType(), NumBytes - 1)); Value *LoLd = Builder.CreateCall(Lvx, Op0, "ld.lo"); Value *HiLd = Builder.CreateCall(Lvx, HiMem, "ld.hi"); Value *Mask1 = Builder.CreateCall(Lvs, Op0, "mask1"); Op0 = IsLE ? HiLd : LoLd; Op1 = IsLE ? LoLd : HiLd; Value *AllElts = Builder.CreateCall(Vperm, {Op0, Op1, Mask1}, "shuffle1"); Constant *Zero = llvm::Constant::getNullValue(IsLE ? ResTy : AllElts->getType()); if (IsLE) { SmallVector Consts; for (int Idx = 0; Idx < 16; Idx++) { int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1) : 16 - (NumBytes - Idx); Consts.push_back(Val); } return Builder.CreateShuffleVector(Builder.CreateBitCast(AllElts, ResTy), Zero, Consts); } SmallVector Consts; for (int Idx = 0; Idx < 16; Idx++) Consts.push_back(Builder.getInt8(NumBytes + Idx)); Value *Mask2 = ConstantVector::get(Consts); return Builder.CreateBitCast( Builder.CreateCall(Vperm, {Zero, AllElts, Mask2}, "shuffle2"), ResTy); } case PPC::BI__builtin_vsx_strmb: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); int64_t NumBytes = cast(Op1)->getZExtValue(); bool IsLE = getTarget().isLittleEndian(); auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { // Storing the whole vector, simply store it on BE and reverse bytes and // store on LE. if (Width == 16) { Value *StVec = Op2; if (IsLE) { SmallVector RevMask; for (int Idx = 0; Idx < 16; Idx++) RevMask.push_back(15 - Idx); StVec = Builder.CreateShuffleVector(Op2, Op2, RevMask); } return Builder.CreateStore( StVec, Address(Op0, Op2->getType(), CharUnits::fromQuantity(1))); } auto *ConvTy = Int64Ty; unsigned NumElts = 0; switch (Width) { default: llvm_unreachable("width for stores must be a power of 2"); case 8: ConvTy = Int64Ty; NumElts = 2; break; case 4: ConvTy = Int32Ty; NumElts = 4; break; case 2: ConvTy = Int16Ty; NumElts = 8; break; case 1: ConvTy = Int8Ty; NumElts = 16; break; } Value *Vec = Builder.CreateBitCast( Op2, llvm::FixedVectorType::get(ConvTy, NumElts)); Value *Ptr = Builder.CreateGEP(Int8Ty, Op0, ConstantInt::get(Int64Ty, Offset)); Value *Elt = Builder.CreateExtractElement(Vec, EltNo); if (IsLE && Width > 1) { Function *F = CGM.getIntrinsic(Intrinsic::bswap, ConvTy); Elt = Builder.CreateCall(F, Elt); } return Builder.CreateStore( Elt, Address(Ptr, ConvTy, CharUnits::fromQuantity(1))); }; unsigned Stored = 0; unsigned RemainingBytes = NumBytes; Value *Result; if (NumBytes == 16) return StoreSubVec(16, 0, 0); if (NumBytes >= 8) { Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1); RemainingBytes -= 8; Stored += 8; } if (RemainingBytes >= 4) { Result = StoreSubVec(4, NumBytes - Stored - 4, IsLE ? (Stored >> 2) : 3 - (Stored >> 2)); RemainingBytes -= 4; Stored += 4; } if (RemainingBytes >= 2) { Result = StoreSubVec(2, NumBytes - Stored - 2, IsLE ? (Stored >> 1) : 7 - (Stored >> 1)); RemainingBytes -= 2; Stored += 2; } if (RemainingBytes) Result = StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored); return Result; } // Square root case PPC::BI__builtin_vsx_xvsqrtsp: case PPC::BI__builtin_vsx_xvsqrtdp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); if (Builder.getIsFPConstrained()) { llvm::Function *F = CGM.getIntrinsic( Intrinsic::experimental_constrained_sqrt, ResultType); return Builder.CreateConstrainedFPCall(F, X); } else { llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); return Builder.CreateCall(F, X); } } // Count leading zeros case PPC::BI__builtin_altivec_vclzb: case PPC::BI__builtin_altivec_vclzh: case PPC::BI__builtin_altivec_vclzw: case PPC::BI__builtin_altivec_vclzd: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); return Builder.CreateCall(F, {X, Undef}); } case PPC::BI__builtin_altivec_vctzb: case PPC::BI__builtin_altivec_vctzh: case PPC::BI__builtin_altivec_vctzw: case PPC::BI__builtin_altivec_vctzd: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); return Builder.CreateCall(F, {X, Undef}); } case PPC::BI__builtin_altivec_vinsd: case PPC::BI__builtin_altivec_vinsw: case PPC::BI__builtin_altivec_vinsd_elt: case PPC::BI__builtin_altivec_vinsw_elt: { llvm::Type *ResultType = ConvertType(E->getType()); Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw || BuiltinID == PPC::BI__builtin_altivec_vinsd); bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw || BuiltinID == PPC::BI__builtin_altivec_vinsw_elt); // The third argument must be a compile time constant. ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third Arg to vinsw/vinsd intrinsic must be a constant integer!"); // Valid value for the third argument is dependent on the input type and // builtin called. int ValidMaxValue = 0; if (IsUnaligned) ValidMaxValue = (Is32bit) ? 12 : 8; else ValidMaxValue = (Is32bit) ? 3 : 1; // Get value of third argument. int64_t ConstArg = ArgCI->getSExtValue(); // Compose range checking error message. std::string RangeErrMsg = IsUnaligned ? "byte" : "element"; RangeErrMsg += " number " + llvm::to_string(ConstArg); RangeErrMsg += " is outside of the valid range [0, "; RangeErrMsg += llvm::to_string(ValidMaxValue) + "]"; // Issue error if third argument is not within the valid range. if (ConstArg < 0 || ConstArg > ValidMaxValue) CGM.Error(E->getExprLoc(), RangeErrMsg); // Input to vec_replace_elt is an element index, convert to byte index. if (!IsUnaligned) { ConstArg *= Is32bit ? 4 : 8; // Fix the constant according to endianess. if (getTarget().isLittleEndian()) ConstArg = (Is32bit ? 12 : 8) - ConstArg; } ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd; Op2 = ConstantInt::getSigned(Int32Ty, ConstArg); // Casting input to vector int as per intrinsic definition. Op0 = Is32bit ? Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)) : Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); return Builder.CreateBitCast( Builder.CreateCall(CGM.getIntrinsic(ID), {Op0, Op1, Op2}), ResultType); } case PPC::BI__builtin_altivec_vpopcntb: case PPC::BI__builtin_altivec_vpopcnth: case PPC::BI__builtin_altivec_vpopcntw: case PPC::BI__builtin_altivec_vpopcntd: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); return Builder.CreateCall(F, X); } case PPC::BI__builtin_altivec_vadduqm: case PPC::BI__builtin_altivec_vsubuqm: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int128Ty, 1)); Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int128Ty, 1)); if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) return Builder.CreateAdd(Op0, Op1, "vadduqm"); else return Builder.CreateSub(Op0, Op1, "vsubuqm"); } case PPC::BI__builtin_altivec_vaddcuq_c: case PPC::BI__builtin_altivec_vsubcuq_c: { SmallVector Ops; Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *V1I128Ty = llvm::FixedVectorType::get( llvm::IntegerType::get(getLLVMContext(), 128), 1); Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c) ? Intrinsic::ppc_altivec_vaddcuq : Intrinsic::ppc_altivec_vsubcuq; return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); } case PPC::BI__builtin_altivec_vaddeuqm_c: case PPC::BI__builtin_altivec_vaddecuq_c: case PPC::BI__builtin_altivec_vsubeuqm_c: case PPC::BI__builtin_altivec_vsubecuq_c: { SmallVector Ops; Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Type *V1I128Ty = llvm::FixedVectorType::get( llvm::IntegerType::get(getLLVMContext(), 128), 1); Ops.push_back(Builder.CreateBitCast(Op0, V1I128Ty)); Ops.push_back(Builder.CreateBitCast(Op1, V1I128Ty)); Ops.push_back(Builder.CreateBitCast(Op2, V1I128Ty)); switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); case PPC::BI__builtin_altivec_vaddeuqm_c: ID = Intrinsic::ppc_altivec_vaddeuqm; break; case PPC::BI__builtin_altivec_vaddecuq_c: ID = Intrinsic::ppc_altivec_vaddecuq; break; case PPC::BI__builtin_altivec_vsubeuqm_c: ID = Intrinsic::ppc_altivec_vsubeuqm; break; case PPC::BI__builtin_altivec_vsubecuq_c: ID = Intrinsic::ppc_altivec_vsubecuq; break; } return Builder.CreateCall(CGM.getIntrinsic(ID), Ops, ""); } case PPC::BI__builtin_ppc_rldimi: case PPC::BI__builtin_ppc_rlwimi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); // rldimi is 64-bit instruction, expand the intrinsic before isel to // leverage peephole and avoid legalization efforts. if (BuiltinID == PPC::BI__builtin_ppc_rldimi && !getTarget().getTriple().isPPC64()) { Function *F = CGM.getIntrinsic(Intrinsic::fshl, Op0->getType()); Op2 = Builder.CreateZExt(Op2, Int64Ty); Value *Shift = Builder.CreateCall(F, {Op0, Op0, Op2}); return Builder.CreateOr(Builder.CreateAnd(Shift, Op3), Builder.CreateAnd(Op1, Builder.CreateNot(Op3))); } return Builder.CreateCall( CGM.getIntrinsic(BuiltinID == PPC::BI__builtin_ppc_rldimi ? Intrinsic::ppc_rldimi : Intrinsic::ppc_rlwimi), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_rlwnm: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_rlwnm), {Op0, Op1, Op2}); } case PPC::BI__builtin_ppc_poppar4: case PPC::BI__builtin_ppc_poppar8: { Value *Op0 = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = Op0->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); Value *Tmp = Builder.CreateCall(F, Op0); llvm::Type *ResultType = ConvertType(E->getType()); Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return Result; } case PPC::BI__builtin_ppc_cmpb: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); if (getTarget().getTriple().isPPC64()) { Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int64Ty, Int64Ty, Int64Ty}); return Builder.CreateCall(F, {Op0, Op1}, "cmpb"); } // For 32 bit, emit the code as below: // %conv = trunc i64 %a to i32 // %conv1 = trunc i64 %b to i32 // %shr = lshr i64 %a, 32 // %conv2 = trunc i64 %shr to i32 // %shr3 = lshr i64 %b, 32 // %conv4 = trunc i64 %shr3 to i32 // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1) // %conv5 = zext i32 %0 to i64 // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4) // %conv614 = zext i32 %1 to i64 // %shl = shl nuw i64 %conv614, 32 // %or = or i64 %shl, %conv5 // ret i64 %or Function *F = CGM.getIntrinsic(Intrinsic::ppc_cmpb, {Int32Ty, Int32Ty, Int32Ty}); Value *ArgOneLo = Builder.CreateTrunc(Op0, Int32Ty); Value *ArgTwoLo = Builder.CreateTrunc(Op1, Int32Ty); Constant *ShiftAmt = ConstantInt::get(Int64Ty, 32); Value *ArgOneHi = Builder.CreateTrunc(Builder.CreateLShr(Op0, ShiftAmt), Int32Ty); Value *ArgTwoHi = Builder.CreateTrunc(Builder.CreateLShr(Op1, ShiftAmt), Int32Ty); Value *ResLo = Builder.CreateZExt( Builder.CreateCall(F, {ArgOneLo, ArgTwoLo}, "cmpb"), Int64Ty); Value *ResHiShift = Builder.CreateZExt( Builder.CreateCall(F, {ArgOneHi, ArgTwoHi}, "cmpb"), Int64Ty); Value *ResHi = Builder.CreateShl(ResHiShift, ShiftAmt); return Builder.CreateOr(ResLo, ResHi); } // Copy sign case PPC::BI__builtin_vsx_xvcpsgnsp: case PPC::BI__builtin_vsx_xvcpsgndp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); ID = Intrinsic::copysign; llvm::Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, {X, Y}); } // Rounding/truncation case PPC::BI__builtin_vsx_xvrspip: case PPC::BI__builtin_vsx_xvrdpip: case PPC::BI__builtin_vsx_xvrdpim: case PPC::BI__builtin_vsx_xvrspim: case PPC::BI__builtin_vsx_xvrdpi: case PPC::BI__builtin_vsx_xvrspi: case PPC::BI__builtin_vsx_xvrdpic: case PPC::BI__builtin_vsx_xvrspic: case PPC::BI__builtin_vsx_xvrdpiz: case PPC::BI__builtin_vsx_xvrspiz: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || BuiltinID == PPC::BI__builtin_vsx_xvrspim) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_floor : Intrinsic::floor; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || BuiltinID == PPC::BI__builtin_vsx_xvrspi) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_round : Intrinsic::round; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || BuiltinID == PPC::BI__builtin_vsx_xvrspic) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_rint : Intrinsic::rint; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || BuiltinID == PPC::BI__builtin_vsx_xvrspip) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_ceil : Intrinsic::ceil; else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || BuiltinID == PPC::BI__builtin_vsx_xvrspiz) ID = Builder.getIsFPConstrained() ? Intrinsic::experimental_constrained_trunc : Intrinsic::trunc; llvm::Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(F, X) : Builder.CreateCall(F, X); } // Absolute value case PPC::BI__builtin_vsx_xvabsdp: case PPC::BI__builtin_vsx_xvabssp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateCall(F, X); } // Fastmath by default case PPC::BI__builtin_ppc_recipdivf: case PPC::BI__builtin_ppc_recipdivd: case PPC::BI__builtin_ppc_rsqrtf: case PPC::BI__builtin_ppc_rsqrtd: { FastMathFlags FMF = Builder.getFastMathFlags(); Builder.getFastMathFlags().setFast(); llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || BuiltinID == PPC::BI__builtin_ppc_recipdivd) { Value *Y = EmitScalarExpr(E->getArg(1)); Value *FDiv = Builder.CreateFDiv(X, Y, "recipdiv"); Builder.getFastMathFlags() &= (FMF); return FDiv; } auto *One = ConstantFP::get(ResultType, 1.0); llvm::Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); Value *FDiv = Builder.CreateFDiv(One, Builder.CreateCall(F, X), "rsqrt"); Builder.getFastMathFlags() &= (FMF); return FDiv; } case PPC::BI__builtin_ppc_alignx: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); ConstantInt *AlignmentCI = cast(Op0); if (AlignmentCI->getValue().ugt(llvm::Value::MaximumAlignment)) AlignmentCI = ConstantInt::get(AlignmentCI->getIntegerType(), llvm::Value::MaximumAlignment); emitAlignmentAssumption(Op1, E->getArg(1), /*The expr loc is sufficient.*/ SourceLocation(), AlignmentCI, nullptr); return Op1; } case PPC::BI__builtin_ppc_rdlam: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Type *Ty = Op0->getType(); Value *ShiftAmt = Builder.CreateIntCast(Op1, Ty, false); Function *F = CGM.getIntrinsic(Intrinsic::fshl, Ty); Value *Rotate = Builder.CreateCall(F, {Op0, Op0, ShiftAmt}); return Builder.CreateAnd(Rotate, Op2); } case PPC::BI__builtin_ppc_load2r: { Function *F = CGM.getIntrinsic(Intrinsic::ppc_load2r); Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *LoadIntrinsic = Builder.CreateCall(F, {Op0}); return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); } // FMA variations case PPC::BI__builtin_ppc_fnmsub: case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: case PPC::BI__builtin_vsx_xvnmaddadp: case PPC::BI__builtin_vsx_xvnmaddasp: case PPC::BI__builtin_vsx_xvmsubadp: case PPC::BI__builtin_vsx_xvmsubasp: case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); llvm::Function *F; if (Builder.getIsFPConstrained()) F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); else F = CGM.getIntrinsic(Intrinsic::fma, ResultType); switch (BuiltinID) { case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: if (Builder.getIsFPConstrained()) return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); else return Builder.CreateCall(F, {X, Y, Z}); case PPC::BI__builtin_vsx_xvnmaddadp: case PPC::BI__builtin_vsx_xvnmaddasp: if (Builder.getIsFPConstrained()) return Builder.CreateFNeg( Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); else return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); case PPC::BI__builtin_vsx_xvmsubadp: case PPC::BI__builtin_vsx_xvmsubasp: if (Builder.getIsFPConstrained()) return Builder.CreateConstrainedFPCall( F, {X, Y, Builder.CreateFNeg(Z, "neg")}); else return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); case PPC::BI__builtin_ppc_fnmsub: case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: if (Builder.getIsFPConstrained()) return Builder.CreateFNeg( Builder.CreateConstrainedFPCall( F, {X, Y, Builder.CreateFNeg(Z, "neg")}), "neg"); else return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); } llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } case PPC::BI__builtin_vsx_insertword: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw); // Third argument is a compile time constant int. It must be clamped to // to the range [0, 12]. ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third arg to xxinsertw intrinsic must be constant integer"); const int64_t MaxIndex = 12; int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); // The builtin semantics don't exactly match the xxinsertw instructions // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the // word from the first argument, and inserts it in the second argument. The // instruction extracts the word from its second input register and inserts // it into its first input register, so swap the first and second arguments. std::swap(Op0, Op1); // Need to cast the second argument from a vector of unsigned int to a // vector of long long. Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); if (getTarget().isLittleEndian()) { // Reverse the double words in the vector we will extract from. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); Op0 = Builder.CreateShuffleVector(Op0, Op0, ArrayRef{1, 0}); // Reverse the index. Index = MaxIndex - Index; } // Intrinsic expects the first arg to be a vector of int. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); Op2 = ConstantInt::getSigned(Int32Ty, Index); return Builder.CreateCall(F, {Op0, Op1, Op2}); } case PPC::BI__builtin_vsx_extractuword: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw); // Intrinsic expects the first argument to be a vector of doublewords. Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); // The second argument is a compile time constant int that needs to // be clamped to the range [0, 12]. ConstantInt *ArgCI = dyn_cast(Op1); assert(ArgCI && "Second Arg to xxextractuw intrinsic must be a constant integer!"); const int64_t MaxIndex = 12; int64_t Index = std::clamp(ArgCI->getSExtValue(), (int64_t)0, MaxIndex); if (getTarget().isLittleEndian()) { // Reverse the index. Index = MaxIndex - Index; Op1 = ConstantInt::getSigned(Int32Ty, Index); // Emit the call, then reverse the double words of the results vector. Value *Call = Builder.CreateCall(F, {Op0, Op1}); Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ArrayRef{1, 0}); return ShuffleCall; } else { Op1 = ConstantInt::getSigned(Int32Ty, Index); return Builder.CreateCall(F, {Op0, Op1}); } } case PPC::BI__builtin_vsx_xxpermdi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third arg must be constant integer!"); unsigned Index = ArgCI->getZExtValue(); Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int64Ty, 2)); Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int64Ty, 2)); // Account for endianness by treating this as just a shuffle. So we use the // same indices for both LE and BE in order to produce expected results in // both cases. int ElemIdx0 = (Index & 2) >> 1; int ElemIdx1 = 2 + (Index & 1); int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_vsx_xxsldwi: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); ConstantInt *ArgCI = dyn_cast(Op2); assert(ArgCI && "Third argument must be a compile time constant"); unsigned Index = ArgCI->getZExtValue() & 0x3; Op0 = Builder.CreateBitCast(Op0, llvm::FixedVectorType::get(Int32Ty, 4)); Op1 = Builder.CreateBitCast(Op1, llvm::FixedVectorType::get(Int32Ty, 4)); // Create a shuffle mask int ElemIdx0; int ElemIdx1; int ElemIdx2; int ElemIdx3; if (getTarget().isLittleEndian()) { // Little endian element N comes from element 8+N-Index of the // concatenated wide vector (of course, using modulo arithmetic on // the total number of elements). ElemIdx0 = (8 - Index) % 8; ElemIdx1 = (9 - Index) % 8; ElemIdx2 = (10 - Index) % 8; ElemIdx3 = (11 - Index) % 8; } else { // Big endian ElemIdx = Index + N ElemIdx0 = Index; ElemIdx1 = Index + 1; ElemIdx2 = Index + 2; ElemIdx3 = Index + 3; } int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; Value *ShuffleCall = Builder.CreateShuffleVector(Op0, Op1, ShuffleElts); QualType BIRetType = E->getType(); auto RetTy = ConvertType(BIRetType); return Builder.CreateBitCast(ShuffleCall, RetTy); } case PPC::BI__builtin_pack_vector_int128: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); bool isLittleEndian = getTarget().isLittleEndian(); Value *PoisonValue = llvm::PoisonValue::get(llvm::FixedVectorType::get(Op0->getType(), 2)); Value *Res = Builder.CreateInsertElement( PoisonValue, Op0, (uint64_t)(isLittleEndian ? 1 : 0)); Res = Builder.CreateInsertElement(Res, Op1, (uint64_t)(isLittleEndian ? 0 : 1)); return Builder.CreateBitCast(Res, ConvertType(E->getType())); } case PPC::BI__builtin_unpack_vector_int128: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); ConstantInt *Index = cast(Op1); Value *Unpacked = Builder.CreateBitCast( Op0, llvm::FixedVectorType::get(ConvertType(E->getType()), 2)); if (getTarget().isLittleEndian()) Index = ConstantInt::get(Index->getIntegerType(), 1 - Index->getZExtValue()); return Builder.CreateExtractElement(Unpacked, Index); } case PPC::BI__builtin_ppc_sthcx: { llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_sthcx); Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = Builder.CreateSExt(EmitScalarExpr(E->getArg(1)), Int32Ty); return Builder.CreateCall(F, {Op0, Op1}); } // The PPC MMA builtins take a pointer to a __vector_quad as an argument. // Some of the MMA instructions accumulate their result into an existing // accumulator whereas the others generate a new accumulator. So we need to // use custom code generation to expand a builtin call with a pointer to a // load (if the corresponding instruction accumulates its result) followed by // the call to the intrinsic and a store of the result. #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \ case PPC::BI__builtin_##Name: #include "clang/Basic/BuiltinsPPC.def" { SmallVector Ops; for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) if (E->getArg(i)->getType()->isArrayType()) Ops.push_back( EmitArrayToPointerDecay(E->getArg(i)).emitRawPointer(*this)); else Ops.push_back(EmitScalarExpr(E->getArg(i))); // The first argument of these two builtins is a pointer used to store their // result. However, the llvm intrinsics return their result in multiple // return values. So, here we emit code extracting these values from the // intrinsic results and storing them using that pointer. if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc || BuiltinID == PPC::BI__builtin_vsx_disassemble_pair || BuiltinID == PPC::BI__builtin_mma_disassemble_pair) { unsigned NumVecs = 2; auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { NumVecs = 4; Intrinsic = Intrinsic::ppc_mma_disassemble_acc; } llvm::Function *F = CGM.getIntrinsic(Intrinsic); Address Addr = EmitPointerWithAlignment(E->getArg(1)); Value *Vec = Builder.CreateLoad(Addr); Value *Call = Builder.CreateCall(F, {Vec}); llvm::Type *VTy = llvm::FixedVectorType::get(Int8Ty, 16); Value *Ptr = Ops[0]; for (unsigned i=0; i CallOps; if (Accumulate) { Address Addr = EmitPointerWithAlignment(E->getArg(0)); Value *Acc = Builder.CreateLoad(Addr); CallOps.push_back(Acc); } for (unsigned i=1; igetArg(0)); Address OldValAddr = EmitPointerWithAlignment(E->getArg(1)); Value *OldVal = Builder.CreateLoad(OldValAddr); QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); LValue LV = MakeAddrLValue(Addr, AtomicTy); Value *Op2 = EmitScalarExpr(E->getArg(2)); auto Pair = EmitAtomicCompareExchange( LV, RValue::get(OldVal), RValue::get(Op2), E->getExprLoc(), llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); // Unlike c11's atomic_compare_exchange, according to // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp // > In either case, the contents of the memory location specified by addr // > are copied into the memory location specified by old_val_addr. // But it hasn't specified storing to OldValAddr is atomic or not and // which order to use. Now following XL's codegen, treat it as a normal // store. Value *LoadedVal = Pair.first.getScalarVal(); Builder.CreateStore(LoadedVal, OldValAddr); return Builder.CreateZExt(Pair.second, Builder.getInt32Ty()); } case PPC::BI__builtin_ppc_fetch_and_add: case PPC::BI__builtin_ppc_fetch_and_addlp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_fetch_and_and: case PPC::BI__builtin_ppc_fetch_and_andlp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_fetch_and_or: case PPC::BI__builtin_ppc_fetch_and_orlp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_fetch_and_swap: case PPC::BI__builtin_ppc_fetch_and_swaplp: { return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, llvm::AtomicOrdering::Monotonic); } case PPC::BI__builtin_ppc_ldarx: case PPC::BI__builtin_ppc_lwarx: case PPC::BI__builtin_ppc_lharx: case PPC::BI__builtin_ppc_lbarx: return emitPPCLoadReserveIntrinsic(*this, BuiltinID, E); case PPC::BI__builtin_ppc_mfspr: { Value *Op0 = EmitScalarExpr(E->getArg(0)); llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mfspr, RetType); return Builder.CreateCall(F, {Op0}); } case PPC::BI__builtin_ppc_mtspr: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(VoidPtrTy) == 32 ? Int32Ty : Int64Ty; Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtspr, RetType); return Builder.CreateCall(F, {Op0, Op1}); } case PPC::BI__builtin_ppc_popcntb: { Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ppc_popcntb, {ArgType, ArgType}); return Builder.CreateCall(F, {ArgValue}, "popcntb"); } case PPC::BI__builtin_ppc_mtfsf: { // The builtin takes a uint32 that needs to be cast to an // f64 to be passed to the intrinsic. Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Cast = Builder.CreateUIToFP(Op1, DoubleTy); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_mtfsf); return Builder.CreateCall(F, {Op0, Cast}, ""); } case PPC::BI__builtin_ppc_swdiv_nochk: case PPC::BI__builtin_ppc_swdivs_nochk: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); FastMathFlags FMF = Builder.getFastMathFlags(); Builder.getFastMathFlags().setFast(); Value *FDiv = Builder.CreateFDiv(Op0, Op1, "swdiv_nochk"); Builder.getFastMathFlags() &= (FMF); return FDiv; } case PPC::BI__builtin_ppc_fric: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::rint, Intrinsic::experimental_constrained_rint)) .getScalarVal(); case PPC::BI__builtin_ppc_frim: case PPC::BI__builtin_ppc_frims: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::floor, Intrinsic::experimental_constrained_floor)) .getScalarVal(); case PPC::BI__builtin_ppc_frin: case PPC::BI__builtin_ppc_frins: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::round, Intrinsic::experimental_constrained_round)) .getScalarVal(); case PPC::BI__builtin_ppc_frip: case PPC::BI__builtin_ppc_frips: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::ceil, Intrinsic::experimental_constrained_ceil)) .getScalarVal(); case PPC::BI__builtin_ppc_friz: case PPC::BI__builtin_ppc_frizs: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::trunc, Intrinsic::experimental_constrained_trunc)) .getScalarVal(); case PPC::BI__builtin_ppc_fsqrt: case PPC::BI__builtin_ppc_fsqrts: return RValue::get(emitUnaryMaybeConstrainedFPBuiltin( *this, E, Intrinsic::sqrt, Intrinsic::experimental_constrained_sqrt)) .getScalarVal(); case PPC::BI__builtin_ppc_test_data_class: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::ppc_test_data_class, Op0->getType()), {Op0, Op1}, "test_data_class"); } case PPC::BI__builtin_ppc_maxfe: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfe), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_maxfl: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfl), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_maxfs: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_maxfs), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_minfe: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfe), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_minfl: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfl), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_minfs: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); Value *Op2 = EmitScalarExpr(E->getArg(2)); Value *Op3 = EmitScalarExpr(E->getArg(3)); return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_minfs), {Op0, Op1, Op2, Op3}); } case PPC::BI__builtin_ppc_swdiv: case PPC::BI__builtin_ppc_swdivs: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); return Builder.CreateFDiv(Op0, Op1, "swdiv"); } case PPC::BI__builtin_ppc_set_fpscr_rn: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_setrnd), {EmitScalarExpr(E->getArg(0))}); case PPC::BI__builtin_ppc_mffs: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::ppc_readflm)); } } namespace { // If \p E is not null pointer, insert address space cast to match return // type of \p E if necessary. Value *EmitAMDGPUDispatchPtr(CodeGenFunction &CGF, const CallExpr *E = nullptr) { auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_dispatch_ptr); auto *Call = CGF.Builder.CreateCall(F); Call->addRetAttr( Attribute::getWithDereferenceableBytes(Call->getContext(), 64)); Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(4))); if (!E) return Call; QualType BuiltinRetType = E->getType(); auto *RetTy = cast(CGF.ConvertType(BuiltinRetType)); if (RetTy == Call->getType()) return Call; return CGF.Builder.CreateAddrSpaceCast(Call, RetTy); } Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) { auto *F = CGF.CGM.getIntrinsic(Intrinsic::amdgcn_implicitarg_ptr); auto *Call = CGF.Builder.CreateCall(F); Call->addRetAttr( Attribute::getWithDereferenceableBytes(Call->getContext(), 256)); Call->addRetAttr(Attribute::getWithAlignment(Call->getContext(), Align(8))); return Call; } // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. /// Emit code based on Code Object ABI version. /// COV_4 : Emit code to use dispatch ptr /// COV_5+ : Emit code to use implicitarg ptr /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version" /// and use its value for COV_4 or COV_5+ approach. It is used for /// compiling device libraries in an ABI-agnostic way. /// /// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by /// clang during compilation of user code. Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { llvm::LoadInst *LD; auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion; if (Cov == CodeObjectVersionKind::COV_None) { StringRef Name = "__oclc_ABI_version"; auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name); if (!ABIVersionC) ABIVersionC = new llvm::GlobalVariable( CGF.CGM.getModule(), CGF.Int32Ty, false, llvm::GlobalValue::ExternalLinkage, nullptr, Name, nullptr, llvm::GlobalVariable::NotThreadLocal, CGF.CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant)); // This load will be eliminated by the IPSCCP because it is constant // weak_odr without externally_initialized. Either changing it to weak or // adding externally_initialized will keep the load. Value *ABIVersion = CGF.Builder.CreateAlignedLoad(CGF.Int32Ty, ABIVersionC, CGF.CGM.getIntAlign()); Value *IsCOV5 = CGF.Builder.CreateICmpSGE( ABIVersion, llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5)); // Indexing the implicit kernarg segment. Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32( CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); // Indexing the HSA kernel_dispatch_packet struct. Value *DispatchGEP = CGF.Builder.CreateConstGEP1_32( CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); auto Result = CGF.Builder.CreateSelect(IsCOV5, ImplicitGEP, DispatchGEP); LD = CGF.Builder.CreateLoad( Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2))); } else { Value *GEP = nullptr; if (Cov >= CodeObjectVersionKind::COV_5) { // Indexing the implicit kernarg segment. GEP = CGF.Builder.CreateConstGEP1_32( CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); } else { // Indexing the HSA kernel_dispatch_packet struct. GEP = CGF.Builder.CreateConstGEP1_32( CGF.Int8Ty, EmitAMDGPUDispatchPtr(CGF), 4 + Index * 2); } LD = CGF.Builder.CreateLoad( Address(GEP, CGF.Int16Ty, CharUnits::fromQuantity(2))); } llvm::MDBuilder MDHelper(CGF.getLLVMContext()); llvm::MDNode *RNode = MDHelper.createRange(APInt(16, 1), APInt(16, CGF.getTarget().getMaxOpenCLWorkGroupSize() + 1)); LD->setMetadata(llvm::LLVMContext::MD_range, RNode); LD->setMetadata(llvm::LLVMContext::MD_noundef, llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); LD->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return LD; } // \p Index is 0, 1, and 2 for x, y, and z dimension, respectively. Value *EmitAMDGPUGridSize(CodeGenFunction &CGF, unsigned Index) { const unsigned XOffset = 12; auto *DP = EmitAMDGPUDispatchPtr(CGF); // Indexing the HSA kernel_dispatch_packet struct. auto *Offset = llvm::ConstantInt::get(CGF.Int32Ty, XOffset + Index * 4); auto *GEP = CGF.Builder.CreateGEP(CGF.Int8Ty, DP, Offset); auto *LD = CGF.Builder.CreateLoad( Address(GEP, CGF.Int32Ty, CharUnits::fromQuantity(4))); LD->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(CGF.getLLVMContext(), std::nullopt)); return LD; } } // namespace // For processing memory ordering and memory scope arguments of various // amdgcn builtins. // \p Order takes a C++11 comptabile memory-ordering specifier and converts // it into LLVM's memory ordering specifier using atomic C ABI, and writes // to \p AO. \p Scope takes a const char * and converts it into AMDGCN // specific SyncScopeID and writes it to \p SSID. void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID) { int ord = cast(Order)->getZExtValue(); // Map C11/C++11 memory ordering to LLVM memory ordering assert(llvm::isValidAtomicOrderingCABI(ord)); switch (static_cast(ord)) { case llvm::AtomicOrderingCABI::acquire: case llvm::AtomicOrderingCABI::consume: AO = llvm::AtomicOrdering::Acquire; break; case llvm::AtomicOrderingCABI::release: AO = llvm::AtomicOrdering::Release; break; case llvm::AtomicOrderingCABI::acq_rel: AO = llvm::AtomicOrdering::AcquireRelease; break; case llvm::AtomicOrderingCABI::seq_cst: AO = llvm::AtomicOrdering::SequentiallyConsistent; break; case llvm::AtomicOrderingCABI::relaxed: AO = llvm::AtomicOrdering::Monotonic; break; } // Some of the atomic builtins take the scope as a string name. StringRef scp; if (llvm::getConstantStringInfo(Scope, scp)) { SSID = getLLVMContext().getOrInsertSyncScopeID(scp); return; } // Older builtins had an enum argument for the memory scope. int scope = cast(Scope)->getZExtValue(); switch (scope) { case 0: // __MEMORY_SCOPE_SYSTEM SSID = llvm::SyncScope::System; break; case 1: // __MEMORY_SCOPE_DEVICE SSID = getLLVMContext().getOrInsertSyncScopeID("agent"); break; case 2: // __MEMORY_SCOPE_WRKGRP SSID = getLLVMContext().getOrInsertSyncScopeID("workgroup"); break; case 3: // __MEMORY_SCOPE_WVFRNT SSID = getLLVMContext().getOrInsertSyncScopeID("wavefront"); break; case 4: // __MEMORY_SCOPE_SINGLE SSID = llvm::SyncScope::SingleThread; break; default: SSID = llvm::SyncScope::System; break; } } llvm::Value *CodeGenFunction::EmitScalarOrConstFoldImmArg(unsigned ICEArguments, unsigned Idx, const CallExpr *E) { llvm::Value *Arg = nullptr; if ((ICEArguments & (1 << Idx)) == 0) { Arg = EmitScalarExpr(E->getArg(Idx)); } else { // If this is required to be a constant, constant fold it so that we // know that the generated intrinsic gets a ConstantInt. std::optional Result = E->getArg(Idx)->getIntegerConstantExpr(getContext()); assert(Result && "Expected argument to be a constant"); Arg = llvm::ConstantInt::get(getLLVMContext(), *Result); } return Arg; } Intrinsic::ID getDotProductIntrinsic(QualType QT, int elementCount) { if (QT->hasFloatingRepresentation()) { switch (elementCount) { case 2: return Intrinsic::dx_dot2; case 3: return Intrinsic::dx_dot3; case 4: return Intrinsic::dx_dot4; } } if (QT->hasSignedIntegerRepresentation()) return Intrinsic::dx_sdot; assert(QT->hasUnsignedIntegerRepresentation()); return Intrinsic::dx_udot; } Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { if (!getLangOpts().HLSL) return nullptr; switch (BuiltinID) { case Builtin::BI__builtin_hlsl_elementwise_all: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), CGM.getHLSLRuntime().getAllIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.all"); } case Builtin::BI__builtin_hlsl_elementwise_any: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( /*ReturnType=*/llvm::Type::getInt1Ty(getLLVMContext()), CGM.getHLSLRuntime().getAnyIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.any"); } case Builtin::BI__builtin_hlsl_elementwise_clamp: { Value *OpX = EmitScalarExpr(E->getArg(0)); Value *OpMin = EmitScalarExpr(E->getArg(1)); Value *OpMax = EmitScalarExpr(E->getArg(2)); QualType Ty = E->getArg(0)->getType(); bool IsUnsigned = false; if (auto *VecTy = Ty->getAs()) Ty = VecTy->getElementType(); IsUnsigned = Ty->isUnsignedIntegerType(); return Builder.CreateIntrinsic( /*ReturnType=*/OpX->getType(), IsUnsigned ? Intrinsic::dx_uclamp : Intrinsic::dx_clamp, ArrayRef{OpX, OpMin, OpMax}, nullptr, "dx.clamp"); } case Builtin::BI__builtin_hlsl_dot: { Value *Op0 = EmitScalarExpr(E->getArg(0)); Value *Op1 = EmitScalarExpr(E->getArg(1)); llvm::Type *T0 = Op0->getType(); llvm::Type *T1 = Op1->getType(); if (!T0->isVectorTy() && !T1->isVectorTy()) { if (T0->isFloatingPointTy()) return Builder.CreateFMul(Op0, Op1, "dx.dot"); if (T0->isIntegerTy()) return Builder.CreateMul(Op0, Op1, "dx.dot"); // Bools should have been promoted llvm_unreachable( "Scalar dot product is only supported on ints and floats."); } // A VectorSplat should have happened assert(T0->isVectorTy() && T1->isVectorTy() && "Dot product of vector and scalar is not supported."); // A vector sext or sitofp should have happened assert(T0->getScalarType() == T1->getScalarType() && "Dot product of vectors need the same element types."); auto *VecTy0 = E->getArg(0)->getType()->getAs(); [[maybe_unused]] auto *VecTy1 = E->getArg(1)->getType()->getAs(); // A HLSLVectorTruncation should have happend assert(VecTy0->getNumElements() == VecTy1->getNumElements() && "Dot product requires vectors to be of the same size."); return Builder.CreateIntrinsic( /*ReturnType=*/T0->getScalarType(), getDotProductIntrinsic(E->getArg(0)->getType(), VecTy0->getNumElements()), ArrayRef{Op0, Op1}, nullptr, "dx.dot"); } break; case Builtin::BI__builtin_hlsl_lerp: { Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *S = EmitScalarExpr(E->getArg(2)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("lerp operand must have a float representation"); return Builder.CreateIntrinsic( /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getLerpIntrinsic(), ArrayRef{X, Y, S}, nullptr, "hlsl.lerp"); } case Builtin::BI__builtin_hlsl_elementwise_frac: { Value *Op0 = EmitScalarExpr(E->getArg(0)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("frac operand must have a float representation"); return Builder.CreateIntrinsic( /*ReturnType=*/Op0->getType(), Intrinsic::dx_frac, ArrayRef{Op0}, nullptr, "dx.frac"); } case Builtin::BI__builtin_hlsl_elementwise_isinf: { Value *Op0 = EmitScalarExpr(E->getArg(0)); llvm::Type *Xty = Op0->getType(); llvm::Type *retType = llvm::Type::getInt1Ty(this->getLLVMContext()); if (Xty->isVectorTy()) { auto *XVecTy = E->getArg(0)->getType()->getAs(); retType = llvm::VectorType::get( retType, ElementCount::getFixed(XVecTy->getNumElements())); } if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("isinf operand must have a float representation"); return Builder.CreateIntrinsic(retType, Intrinsic::dx_isinf, ArrayRef{Op0}, nullptr, "dx.isinf"); } case Builtin::BI__builtin_hlsl_mad: { Value *M = EmitScalarExpr(E->getArg(0)); Value *A = EmitScalarExpr(E->getArg(1)); Value *B = EmitScalarExpr(E->getArg(2)); if (E->getArg(0)->getType()->hasFloatingRepresentation()) return Builder.CreateIntrinsic( /*ReturnType*/ M->getType(), Intrinsic::fmuladd, ArrayRef{M, A, B}, nullptr, "hlsl.fmad"); if (E->getArg(0)->getType()->hasSignedIntegerRepresentation()) { if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) return Builder.CreateIntrinsic( /*ReturnType*/ M->getType(), Intrinsic::dx_imad, ArrayRef{M, A, B}, nullptr, "dx.imad"); Value *Mul = Builder.CreateNSWMul(M, A); return Builder.CreateNSWAdd(Mul, B); } assert(E->getArg(0)->getType()->hasUnsignedIntegerRepresentation()); if (CGM.getTarget().getTriple().getArch() == llvm::Triple::dxil) return Builder.CreateIntrinsic( /*ReturnType=*/M->getType(), Intrinsic::dx_umad, ArrayRef{M, A, B}, nullptr, "dx.umad"); Value *Mul = Builder.CreateNUWMul(M, A); return Builder.CreateNUWAdd(Mul, B); } case Builtin::BI__builtin_hlsl_elementwise_rcp: { Value *Op0 = EmitScalarExpr(E->getArg(0)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("rcp operand must have a float representation"); llvm::Type *Ty = Op0->getType(); llvm::Type *EltTy = Ty->getScalarType(); Constant *One = Ty->isVectorTy() ? ConstantVector::getSplat( ElementCount::getFixed( cast(Ty)->getNumElements()), ConstantFP::get(EltTy, 1.0)) : ConstantFP::get(EltTy, 1.0); return Builder.CreateFDiv(One, Op0, "hlsl.rcp"); } case Builtin::BI__builtin_hlsl_elementwise_rsqrt: { Value *Op0 = EmitScalarExpr(E->getArg(0)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) llvm_unreachable("rsqrt operand must have a float representation"); return Builder.CreateIntrinsic( /*ReturnType=*/Op0->getType(), CGM.getHLSLRuntime().getRsqrtIntrinsic(), ArrayRef{Op0}, nullptr, "hlsl.rsqrt"); } case Builtin::BI__builtin_hlsl_wave_get_lane_index: { return EmitRuntimeCall(CGM.CreateRuntimeFunction( llvm::FunctionType::get(IntTy, {}, false), "__hlsl_wave_get_lane_index", {}, false, true)); } } return nullptr; } void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, const CallExpr *E) { constexpr const char *Tag = "amdgpu-as"; LLVMContext &Ctx = Inst->getContext(); SmallVector MMRAs; for (unsigned K = 2; K < E->getNumArgs(); ++K) { llvm::Value *V = EmitScalarExpr(E->getArg(K)); StringRef AS; if (llvm::getConstantStringInfo(V, AS)) { MMRAs.push_back({Tag, AS}); // TODO: Delete the resulting unused constant? continue; } CGM.Error(E->getExprLoc(), "expected an address space name as a string literal"); } llvm::sort(MMRAs); MMRAs.erase(llvm::unique(MMRAs), MMRAs.end()); Inst->setMetadata(LLVMContext::MD_mmra, MMRAMetadata::getMD(Ctx, MMRAs)); } Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out // argument. Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); llvm::Value *X = EmitScalarExpr(E->getArg(0)); llvm::Value *Y = EmitScalarExpr(E->getArg(1)); llvm::Value *Z = EmitScalarExpr(E->getArg(2)); llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, X->getType()); llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); llvm::Type *RealFlagType = FlagOutPtr.getElementType(); llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); Builder.CreateStore(FlagExt, FlagOutPtr); return Result; } case AMDGPU::BI__builtin_amdgcn_div_fmas: case AMDGPU::BI__builtin_amdgcn_div_fmasf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, Src0->getType()); llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); } case AMDGPU::BI__builtin_amdgcn_ds_swizzle: return emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::amdgcn_ds_swizzle); case AMDGPU::BI__builtin_amdgcn_mov_dpp8: return emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::amdgcn_mov_dpp8); case AMDGPU::BI__builtin_amdgcn_mov_dpp: case AMDGPU::BI__builtin_amdgcn_update_dpp: { llvm::SmallVector Args; // Find out if any arguments are required to be integer constant // expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned I = 0; I != E->getNumArgs(); ++I) { Args.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, I, E)); } assert(Args.size() == 5 || Args.size() == 6); if (Args.size() == 5) Args.insert(Args.begin(), llvm::PoisonValue::get(Args[0]->getType())); Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType()); return Builder.CreateCall(F, Args); } case AMDGPU::BI__builtin_amdgcn_permlane16: case AMDGPU::BI__builtin_amdgcn_permlanex16: return emitBuiltinWithOneOverloadedType<6>( *this, E, BuiltinID == AMDGPU::BI__builtin_amdgcn_permlane16 ? Intrinsic::amdgcn_permlane16 : Intrinsic::amdgcn_permlanex16); case AMDGPU::BI__builtin_amdgcn_permlane64: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_permlane64); case AMDGPU::BI__builtin_amdgcn_readlane: return emitBuiltinWithOneOverloadedType<2>(*this, E, Intrinsic::amdgcn_readlane); case AMDGPU::BI__builtin_amdgcn_readfirstlane: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_readfirstlane); case AMDGPU::BI__builtin_amdgcn_div_fixup: case AMDGPU::BI__builtin_amdgcn_div_fixupf: case AMDGPU::BI__builtin_amdgcn_div_fixuph: return emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::amdgcn_div_fixup); case AMDGPU::BI__builtin_amdgcn_trig_preop: case AMDGPU::BI__builtin_amdgcn_trig_preopf: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); case AMDGPU::BI__builtin_amdgcn_rcp: case AMDGPU::BI__builtin_amdgcn_rcpf: case AMDGPU::BI__builtin_amdgcn_rcph: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rcp); case AMDGPU::BI__builtin_amdgcn_sqrt: case AMDGPU::BI__builtin_amdgcn_sqrtf: case AMDGPU::BI__builtin_amdgcn_sqrth: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sqrt); case AMDGPU::BI__builtin_amdgcn_rsq: case AMDGPU::BI__builtin_amdgcn_rsqf: case AMDGPU::BI__builtin_amdgcn_rsqh: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq); case AMDGPU::BI__builtin_amdgcn_rsq_clamp: case AMDGPU::BI__builtin_amdgcn_rsq_clampf: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_rsq_clamp); case AMDGPU::BI__builtin_amdgcn_sinf: case AMDGPU::BI__builtin_amdgcn_sinh: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin); case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos); case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: return EmitAMDGPUDispatchPtr(*this, E); case AMDGPU::BI__builtin_amdgcn_logf: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log); case AMDGPU::BI__builtin_amdgcn_exp2f: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_exp2); case AMDGPU::BI__builtin_amdgcn_log_clampf: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_log_clamp); case AMDGPU::BI__builtin_amdgcn_ldexp: case AMDGPU::BI__builtin_amdgcn_ldexpf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Src1->getType()}); return Builder.CreateCall(F, {Src0, Src1}); } case AMDGPU::BI__builtin_amdgcn_ldexph: { // The raw instruction has a different behavior for out of bounds exponent // values (implicit truncation instead of saturate to short_min/short_max). llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ldexp, {Src0->getType(), Int16Ty}); return Builder.CreateCall(F, {Src0, Builder.CreateTrunc(Src1, Int16Ty)}); } case AMDGPU::BI__builtin_amdgcn_frexp_mant: case AMDGPU::BI__builtin_amdgcn_frexp_mantf: case AMDGPU::BI__builtin_amdgcn_frexp_manth: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_frexp_mant); case AMDGPU::BI__builtin_amdgcn_frexp_exp: case AMDGPU::BI__builtin_amdgcn_frexp_expf: { Value *Src0 = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt32Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } case AMDGPU::BI__builtin_amdgcn_frexp_exph: { Value *Src0 = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp, { Builder.getInt16Ty(), Src0->getType() }); return Builder.CreateCall(F, Src0); } case AMDGPU::BI__builtin_amdgcn_fract: case AMDGPU::BI__builtin_amdgcn_fractf: case AMDGPU::BI__builtin_amdgcn_fracth: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_fract); case AMDGPU::BI__builtin_amdgcn_lerp: return emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::amdgcn_lerp); case AMDGPU::BI__builtin_amdgcn_ubfe: return emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::amdgcn_ubfe); case AMDGPU::BI__builtin_amdgcn_sbfe: return emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::amdgcn_sbfe); case AMDGPU::BI__builtin_amdgcn_ballot_w32: case AMDGPU::BI__builtin_amdgcn_ballot_w64: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Value *Src = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); return Builder.CreateCall(F, { Src }); } case AMDGPU::BI__builtin_amdgcn_uicmp: case AMDGPU::BI__builtin_amdgcn_uicmpl: case AMDGPU::BI__builtin_amdgcn_sicmp: case AMDGPU::BI__builtin_amdgcn_sicmpl: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); // FIXME-GFX10: How should 32 bit mask be handled? Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp, { Builder.getInt64Ty(), Src0->getType() }); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } case AMDGPU::BI__builtin_amdgcn_fcmp: case AMDGPU::BI__builtin_amdgcn_fcmpf: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); // FIXME-GFX10: How should 32 bit mask be handled? Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp, { Builder.getInt64Ty(), Src0->getType() }); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } case AMDGPU::BI__builtin_amdgcn_class: case AMDGPU::BI__builtin_amdgcn_classf: case AMDGPU::BI__builtin_amdgcn_classh: return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); case AMDGPU::BI__builtin_amdgcn_fmed3f: case AMDGPU::BI__builtin_amdgcn_fmed3h: return emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::amdgcn_fmed3); case AMDGPU::BI__builtin_amdgcn_ds_append: case AMDGPU::BI__builtin_amdgcn_ds_consume: { Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ? Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume; Value *Src0 = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() }); return Builder.CreateCall(F, { Src0, Builder.getFalse() }); } case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: { Intrinsic::ID IID; llvm::Type *ArgTy = llvm::Type::getDoubleTy(getLLVMContext()); switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f32: ArgTy = llvm::Type::getFloatTy(getLLVMContext()); IID = Intrinsic::amdgcn_global_atomic_fadd; break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2f16: ArgTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(getLLVMContext()), 2); IID = Intrinsic::amdgcn_global_atomic_fadd; break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_f64: IID = Intrinsic::amdgcn_global_atomic_fadd; break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmin_f64: IID = Intrinsic::amdgcn_global_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_global_atomic_fmax_f64: IID = Intrinsic::amdgcn_global_atomic_fmax; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f64: IID = Intrinsic::amdgcn_flat_atomic_fadd; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmin_f64: IID = Intrinsic::amdgcn_flat_atomic_fmin; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fmax_f64: IID = Intrinsic::amdgcn_flat_atomic_fmax; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_f32: ArgTy = llvm::Type::getFloatTy(getLLVMContext()); IID = Intrinsic::amdgcn_flat_atomic_fadd; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2f16: ArgTy = llvm::FixedVectorType::get( llvm::Type::getHalfTy(getLLVMContext()), 2); IID = Intrinsic::amdgcn_flat_atomic_fadd; break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(IID, {ArgTy, Addr->getType(), Val->getType()}); return Builder.CreateCall(F, {Addr, Val}); } case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: { Intrinsic::ID IID; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_global_atomic_fadd_v2bf16: IID = Intrinsic::amdgcn_global_atomic_fadd_v2bf16; break; case AMDGPU::BI__builtin_amdgcn_flat_atomic_fadd_v2bf16: IID = Intrinsic::amdgcn_flat_atomic_fadd_v2bf16; break; } llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); llvm::Function *F = CGM.getIntrinsic(IID, {Addr->getType()}); return Builder.CreateCall(F, {Addr, Val}); } case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: { Intrinsic::ID IID; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_i32: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b64_v2i32: IID = Intrinsic::amdgcn_global_load_tr_b64; break; case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4i16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4f16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v4bf16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8i16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8f16: case AMDGPU::BI__builtin_amdgcn_global_load_tr_b128_v8bf16: IID = Intrinsic::amdgcn_global_load_tr_b128; break; } llvm::Type *LoadTy = ConvertType(E->getType()); llvm::Value *Addr = EmitScalarExpr(E->getArg(0)); llvm::Function *F = CGM.getIntrinsic(IID, {LoadTy}); return Builder.CreateCall(F, {Addr}); } case AMDGPU::BI__builtin_amdgcn_get_fpenv: { Function *F = CGM.getIntrinsic(Intrinsic::get_fpenv, {llvm::Type::getInt64Ty(getLLVMContext())}); return Builder.CreateCall(F); } case AMDGPU::BI__builtin_amdgcn_set_fpenv: { Function *F = CGM.getIntrinsic(Intrinsic::set_fpenv, {llvm::Type::getInt64Ty(getLLVMContext())}); llvm::Value *Env = EmitScalarExpr(E->getArg(0)); return Builder.CreateCall(F, {Env}); } case AMDGPU::BI__builtin_amdgcn_read_exec: return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_lo: return EmitAMDGCNBallotForExec(*this, E, Int32Ty, Int32Ty, false); case AMDGPU::BI__builtin_amdgcn_read_exec_hi: return EmitAMDGCNBallotForExec(*this, E, Int64Ty, Int64Ty, true); case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_h: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_l: case AMDGPU::BI__builtin_amdgcn_image_bvh_intersect_ray_lh: { llvm::Value *NodePtr = EmitScalarExpr(E->getArg(0)); llvm::Value *RayExtent = EmitScalarExpr(E->getArg(1)); llvm::Value *RayOrigin = EmitScalarExpr(E->getArg(2)); llvm::Value *RayDir = EmitScalarExpr(E->getArg(3)); llvm::Value *RayInverseDir = EmitScalarExpr(E->getArg(4)); llvm::Value *TextureDescr = EmitScalarExpr(E->getArg(5)); // The builtins take these arguments as vec4 where the last element is // ignored. The intrinsic takes them as vec3. RayOrigin = Builder.CreateShuffleVector(RayOrigin, RayOrigin, ArrayRef{0, 1, 2}); RayDir = Builder.CreateShuffleVector(RayDir, RayDir, ArrayRef{0, 1, 2}); RayInverseDir = Builder.CreateShuffleVector(RayInverseDir, RayInverseDir, ArrayRef{0, 1, 2}); Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_image_bvh_intersect_ray, {NodePtr->getType(), RayDir->getType()}); return Builder.CreateCall(F, {NodePtr, RayExtent, RayOrigin, RayDir, RayInverseDir, TextureDescr}); } case AMDGPU::BI__builtin_amdgcn_ds_bvh_stack_rtn: { SmallVector Args; for (int i = 0, e = E->getNumArgs(); i != e; ++i) Args.push_back(EmitScalarExpr(E->getArg(i))); Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ds_bvh_stack_rtn); Value *Call = Builder.CreateCall(F, Args); Value *Rtn = Builder.CreateExtractValue(Call, 0); Value *A = Builder.CreateExtractValue(Call, 1); llvm::Type *RetTy = ConvertType(E->getType()); Value *I0 = Builder.CreateInsertElement(PoisonValue::get(RetTy), Rtn, (uint64_t)0); return Builder.CreateInsertElement(I0, A, 1); } case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: { // These operations perform a matrix multiplication and accumulation of // the form: // D = A * B + C // We need to specify one type for matrices AB and one for matrices CD. // Sparse matrix operations can have different types for A and B as well as // an additional type for sparsity index. // Destination type should be put before types used for source operands. SmallVector ArgsForMatchingMatrixTypes; // On GFX12, the intrinsics with 16-bit accumulator use a packed layout. // There is no need for the variable opsel argument, so always set it to // "false". bool AppendFalseForOpselArg = false; unsigned BuiltinWMMAOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_f16_w64_gfx12: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_f16; break; case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf16_w64_gfx12: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf16; break; case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64_gfx12: AppendFalseForOpselArg = true; [[fallthrough]]; case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16; break; case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64_gfx12: AppendFalseForOpselArg = true; [[fallthrough]]; case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16; break; case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_f16_16x16x16_f16_tied_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f16_16x16x16_f16_tied; break; case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w32: case AMDGPU::BI__builtin_amdgcn_wmma_bf16_16x16x16_bf16_tied_w64: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_bf16_16x16x16_bf16_tied; break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu8_w64_gfx12: ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu8; break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x16_iu4_w64_gfx12: ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x16_iu4; break; case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_fp8_w64_gfx12: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_fp8; break; case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_fp8_bf8_w64_gfx12: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_fp8_bf8; break; case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_fp8_w64_gfx12: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_fp8; break; case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_f32_16x16x16_bf8_bf8_w64_gfx12: ArgsForMatchingMatrixTypes = {2, 0}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_f32_16x16x16_bf8_bf8; break; case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w32_gfx12: case AMDGPU::BI__builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12: ArgsForMatchingMatrixTypes = {4, 1}; // CD, AB BuiltinWMMAOp = Intrinsic::amdgcn_wmma_i32_16x16x32_iu4; break; case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_f16; break; case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf16; break; case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f16_16x16x32_f16; break; case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_bf16_16x16x32_bf16; break; case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64: ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu8; break; case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64: ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x32_iu4; break; case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64: ArgsForMatchingMatrixTypes = {4, 1, 3, 5}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_i32_16x16x64_iu4; break; case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_fp8; break; case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_fp8_bf8; break; case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_fp8; break; case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32: case AMDGPU::BI__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64: ArgsForMatchingMatrixTypes = {2, 0, 1, 3}; // CD, A, B, Index BuiltinWMMAOp = Intrinsic::amdgcn_swmmac_f32_16x16x32_bf8_bf8; break; } SmallVector Args; for (int i = 0, e = E->getNumArgs(); i != e; ++i) Args.push_back(EmitScalarExpr(E->getArg(i))); if (AppendFalseForOpselArg) Args.push_back(Builder.getFalse()); SmallVector ArgTypes; for (auto ArgIdx : ArgsForMatchingMatrixTypes) ArgTypes.push_back(Args[ArgIdx]->getType()); Function *F = CGM.getIntrinsic(BuiltinWMMAOp, ArgTypes); return Builder.CreateCall(F, Args); } // amdgcn workitem case AMDGPU::BI__builtin_amdgcn_workitem_id_x: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); case AMDGPU::BI__builtin_amdgcn_workitem_id_y: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); case AMDGPU::BI__builtin_amdgcn_workitem_id_z: return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); // amdgcn workgroup size case AMDGPU::BI__builtin_amdgcn_workgroup_size_x: return EmitAMDGPUWorkGroupSize(*this, 0); case AMDGPU::BI__builtin_amdgcn_workgroup_size_y: return EmitAMDGPUWorkGroupSize(*this, 1); case AMDGPU::BI__builtin_amdgcn_workgroup_size_z: return EmitAMDGPUWorkGroupSize(*this, 2); // amdgcn grid size case AMDGPU::BI__builtin_amdgcn_grid_size_x: return EmitAMDGPUGridSize(*this, 0); case AMDGPU::BI__builtin_amdgcn_grid_size_y: return EmitAMDGPUGridSize(*this, 1); case AMDGPU::BI__builtin_amdgcn_grid_size_z: return EmitAMDGPUGridSize(*this, 2); // r600 intrinsics case AMDGPU::BI__builtin_r600_recipsqrt_ieee: case AMDGPU::BI__builtin_r600_recipsqrt_ieeef: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::r600_recipsqrt_ieee); case AMDGPU::BI__builtin_r600_read_tidig_x: return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); case AMDGPU::BI__builtin_r600_read_tidig_y: return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); case AMDGPU::BI__builtin_r600_read_tidig_z: return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); case AMDGPU::BI__builtin_amdgcn_alignbit: { llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); Function *F = CGM.getIntrinsic(Intrinsic::fshr, Src0->getType()); return Builder.CreateCall(F, { Src0, Src1, Src2 }); } case AMDGPU::BI__builtin_amdgcn_fence: { ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), AO, SSID); FenceInst *Fence = Builder.CreateFence(AO, SSID); if (E->getNumArgs() > 2) AddAMDGPUFenceAddressSpaceMMRA(Fence, E); return Fence; } case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: case AMDGPU::BI__builtin_amdgcn_atomic_dec32: case AMDGPU::BI__builtin_amdgcn_atomic_dec64: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_fminf: case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: { llvm::AtomicRMWInst::BinOp BinOp; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_atomic_inc32: case AMDGPU::BI__builtin_amdgcn_atomic_inc64: BinOp = llvm::AtomicRMWInst::UIncWrap; break; case AMDGPU::BI__builtin_amdgcn_atomic_dec32: case AMDGPU::BI__builtin_amdgcn_atomic_dec64: BinOp = llvm::AtomicRMWInst::UDecWrap; break; case AMDGPU::BI__builtin_amdgcn_ds_faddf: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f64: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_f32: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2f16: case AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16: BinOp = llvm::AtomicRMWInst::FAdd; break; case AMDGPU::BI__builtin_amdgcn_ds_fminf: BinOp = llvm::AtomicRMWInst::FMin; break; case AMDGPU::BI__builtin_amdgcn_ds_fmaxf: BinOp = llvm::AtomicRMWInst::FMax; break; } Address Ptr = CheckAtomicAlignment(*this, E); Value *Val = EmitScalarExpr(E->getArg(1)); llvm::Type *OrigTy = Val->getType(); QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); bool Volatile; if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_faddf || BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fminf || BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_fmaxf) { // __builtin_amdgcn_ds_faddf/fminf/fmaxf has an explicit volatile argument Volatile = cast(EmitScalarExpr(E->getArg(4)))->getZExtValue(); } else { // Infer volatile from the passed type. Volatile = PtrTy->castAs()->getPointeeType().isVolatileQualified(); } if (E->getNumArgs() >= 4) { // Some of the builtins have explicit ordering and scope arguments. ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)), AO, SSID); } else { // The ds_atomic_fadd_* builtins do not have syncscope/order arguments. SSID = llvm::SyncScope::System; AO = AtomicOrdering::SequentiallyConsistent; // The v2bf16 builtin uses i16 instead of a natural bfloat type. if (BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_atomic_fadd_v2bf16) { llvm::Type *V2BF16Ty = FixedVectorType::get( llvm::Type::getBFloatTy(Builder.getContext()), 2); Val = Builder.CreateBitCast(Val, V2BF16Ty); } } llvm::AtomicRMWInst *RMW = Builder.CreateAtomicRMW(BinOp, Ptr, Val, AO, SSID); if (Volatile) RMW->setVolatile(true); return Builder.CreateBitCast(RMW, OrigTy); } case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtn: case AMDGPU::BI__builtin_amdgcn_s_sendmsg_rtnl: { llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *ResultType = ConvertType(E->getType()); // s_sendmsg_rtn is mangled using return type only. Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_s_sendmsg_rtn, {ResultType}); return Builder.CreateCall(F, {Arg}); } case AMDGPU::BI__builtin_amdgcn_make_buffer_rsrc: return emitBuiltinWithOneOverloadedType<4>( *this, E, Intrinsic::amdgcn_make_buffer_rsrc); case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b8: case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b16: case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b32: case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b64: case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b96: case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128: return emitBuiltinWithOneOverloadedType<5>( *this, E, Intrinsic::amdgcn_raw_ptr_buffer_store); case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: { llvm::Type *RetTy = nullptr; switch (BuiltinID) { case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8: RetTy = Int8Ty; break; case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16: RetTy = Int16Ty; break; case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32: RetTy = Int32Ty; break; case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64: RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2); break; case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96: RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3); break; case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4); break; } Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy); return Builder.CreateCall( F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))}); } default: return nullptr; } } /// Handle a SystemZ function in which the final argument is a pointer /// to an int that receives the post-instruction CC value. At the LLVM level /// this is represented as a function that returns a {result, cc} pair. static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E) { unsigned NumArgs = E->getNumArgs() - 1; SmallVector Args(NumArgs); for (unsigned I = 0; I < NumArgs; ++I) Args[I] = CGF.EmitScalarExpr(E->getArg(I)); Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID); Value *Call = CGF.Builder.CreateCall(F, Args); Value *CC = CGF.Builder.CreateExtractValue(Call, 1); CGF.Builder.CreateStore(CC, CCPtr); return CGF.Builder.CreateExtractValue(Call, 0); } Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { case SystemZ::BI__builtin_tbegin: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbegin_nofloat: { Value *TDB = EmitScalarExpr(E->getArg(0)); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tbeginc: { Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); return Builder.CreateCall(F, {TDB, Control}); } case SystemZ::BI__builtin_tabort: { Value *Data = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort); return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); } case SystemZ::BI__builtin_non_tx_store: { Value *Address = EmitScalarExpr(E->getArg(0)); Value *Data = EmitScalarExpr(E->getArg(1)); Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); return Builder.CreateCall(F, {Data, Address}); } // Vector builtins. Note that most vector builtins are mapped automatically // to target-specific LLVM intrinsics. The ones handled specially here can // be represented via standard LLVM IR, which is preferable to enable common // LLVM optimizations. case SystemZ::BI__builtin_s390_vpopctb: case SystemZ::BI__builtin_s390_vpopcth: case SystemZ::BI__builtin_s390_vpopctf: case SystemZ::BI__builtin_s390_vpopctg: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); return Builder.CreateCall(F, X); } case SystemZ::BI__builtin_s390_vclzb: case SystemZ::BI__builtin_s390_vclzh: case SystemZ::BI__builtin_s390_vclzf: case SystemZ::BI__builtin_s390_vclzg: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); return Builder.CreateCall(F, {X, Undef}); } case SystemZ::BI__builtin_s390_vctzb: case SystemZ::BI__builtin_s390_vctzh: case SystemZ::BI__builtin_s390_vctzf: case SystemZ::BI__builtin_s390_vctzg: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); return Builder.CreateCall(F, {X, Undef}); } case SystemZ::BI__builtin_s390_verllb: case SystemZ::BI__builtin_s390_verllh: case SystemZ::BI__builtin_s390_verllf: case SystemZ::BI__builtin_s390_verllg: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Value *Amt = EmitScalarExpr(E->getArg(1)); // Splat scalar rotate amount to vector type. unsigned NumElts = cast(ResultType)->getNumElements(); Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false); Amt = Builder.CreateVectorSplat(NumElts, Amt); Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType); return Builder.CreateCall(F, { Src, Src, Amt }); } case SystemZ::BI__builtin_s390_verllvb: case SystemZ::BI__builtin_s390_verllvh: case SystemZ::BI__builtin_s390_verllvf: case SystemZ::BI__builtin_s390_verllvg: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Value *Amt = EmitScalarExpr(E->getArg(1)); Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType); return Builder.CreateCall(F, { Src, Src, Amt }); } case SystemZ::BI__builtin_s390_vfsqsb: case SystemZ::BI__builtin_s390_vfsqdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_sqrt, ResultType); return Builder.CreateConstrainedFPCall(F, { X }); } else { Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); return Builder.CreateCall(F, X); } } case SystemZ::BI__builtin_s390_vfmasb: case SystemZ::BI__builtin_s390_vfmadb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); return Builder.CreateConstrainedFPCall(F, {X, Y, Z}); } else { Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Z}); } } case SystemZ::BI__builtin_s390_vfmssb: case SystemZ::BI__builtin_s390_vfmsdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); return Builder.CreateConstrainedFPCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); } else { Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); } } case SystemZ::BI__builtin_s390_vfnmasb: case SystemZ::BI__builtin_s390_vfnmadb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, Z}), "neg"); } else { Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, Z}), "neg"); } } case SystemZ::BI__builtin_s390_vfnmssb: case SystemZ::BI__builtin_s390_vfnmsdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); Value *Z = EmitScalarExpr(E->getArg(2)); if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(Intrinsic::experimental_constrained_fma, ResultType); Value *NegZ = Builder.CreateFNeg(Z, "sub"); return Builder.CreateFNeg(Builder.CreateConstrainedFPCall(F, {X, Y, NegZ})); } else { Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); Value *NegZ = Builder.CreateFNeg(Z, "neg"); return Builder.CreateFNeg(Builder.CreateCall(F, {X, Y, NegZ})); } } case SystemZ::BI__builtin_s390_vflpsb: case SystemZ::BI__builtin_s390_vflpdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateCall(F, X); } case SystemZ::BI__builtin_s390_vflnsb: case SystemZ::BI__builtin_s390_vflndb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); return Builder.CreateFNeg(Builder.CreateCall(F, X), "neg"); } case SystemZ::BI__builtin_s390_vfisb: case SystemZ::BI__builtin_s390_vfidb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); // Constant-fold the M4 and M5 mask arguments. llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext()); llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext()); // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some combinations of M4 and M5. Intrinsic::ID ID = Intrinsic::not_intrinsic; Intrinsic::ID CI; switch (M4.getZExtValue()) { default: break; case 0: // IEEE-inexact exception allowed switch (M5.getZExtValue()) { default: break; case 0: ID = Intrinsic::rint; CI = Intrinsic::experimental_constrained_rint; break; } break; case 4: // IEEE-inexact exception suppressed switch (M5.getZExtValue()) { default: break; case 0: ID = Intrinsic::nearbyint; CI = Intrinsic::experimental_constrained_nearbyint; break; case 1: ID = Intrinsic::round; CI = Intrinsic::experimental_constrained_round; break; case 5: ID = Intrinsic::trunc; CI = Intrinsic::experimental_constrained_trunc; break; case 6: ID = Intrinsic::ceil; CI = Intrinsic::experimental_constrained_ceil; break; case 7: ID = Intrinsic::floor; CI = Intrinsic::experimental_constrained_floor; break; } break; } if (ID != Intrinsic::not_intrinsic) { if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(CI, ResultType); return Builder.CreateConstrainedFPCall(F, X); } else { Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, X); } } switch (BuiltinID) { // FIXME: constrained version? case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break; case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break; default: llvm_unreachable("Unknown BuiltinID"); } Function *F = CGM.getIntrinsic(ID); Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); return Builder.CreateCall(F, {X, M4Value, M5Value}); } case SystemZ::BI__builtin_s390_vfmaxsb: case SystemZ::BI__builtin_s390_vfmaxdb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); // Constant-fold the M4 mask argument. llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some values of M4. Intrinsic::ID ID = Intrinsic::not_intrinsic; Intrinsic::ID CI; switch (M4.getZExtValue()) { default: break; case 4: ID = Intrinsic::maxnum; CI = Intrinsic::experimental_constrained_maxnum; break; } if (ID != Intrinsic::not_intrinsic) { if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(CI, ResultType); return Builder.CreateConstrainedFPCall(F, {X, Y}); } else { Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, {X, Y}); } } switch (BuiltinID) { case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break; case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break; default: llvm_unreachable("Unknown BuiltinID"); } Function *F = CGM.getIntrinsic(ID); Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); return Builder.CreateCall(F, {X, Y, M4Value}); } case SystemZ::BI__builtin_s390_vfminsb: case SystemZ::BI__builtin_s390_vfmindb: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Value *Y = EmitScalarExpr(E->getArg(1)); // Constant-fold the M4 mask argument. llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext()); // Check whether this instance can be represented via a LLVM standard // intrinsic. We only support some values of M4. Intrinsic::ID ID = Intrinsic::not_intrinsic; Intrinsic::ID CI; switch (M4.getZExtValue()) { default: break; case 4: ID = Intrinsic::minnum; CI = Intrinsic::experimental_constrained_minnum; break; } if (ID != Intrinsic::not_intrinsic) { if (Builder.getIsFPConstrained()) { Function *F = CGM.getIntrinsic(CI, ResultType); return Builder.CreateConstrainedFPCall(F, {X, Y}); } else { Function *F = CGM.getIntrinsic(ID, ResultType); return Builder.CreateCall(F, {X, Y}); } } switch (BuiltinID) { case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break; case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break; default: llvm_unreachable("Unknown BuiltinID"); } Function *F = CGM.getIntrinsic(ID); Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); return Builder.CreateCall(F, {X, Y, M4Value}); } case SystemZ::BI__builtin_s390_vlbrh: case SystemZ::BI__builtin_s390_vlbrf: case SystemZ::BI__builtin_s390_vlbrg: { llvm::Type *ResultType = ConvertType(E->getType()); Value *X = EmitScalarExpr(E->getArg(0)); Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType); return Builder.CreateCall(F, X); } // Vector intrinsics that output the post-instruction CC value. #define INTRINSIC_WITH_CC(NAME) \ case SystemZ::BI__builtin_##NAME: \ return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) INTRINSIC_WITH_CC(s390_vpkshs); INTRINSIC_WITH_CC(s390_vpksfs); INTRINSIC_WITH_CC(s390_vpksgs); INTRINSIC_WITH_CC(s390_vpklshs); INTRINSIC_WITH_CC(s390_vpklsfs); INTRINSIC_WITH_CC(s390_vpklsgs); INTRINSIC_WITH_CC(s390_vceqbs); INTRINSIC_WITH_CC(s390_vceqhs); INTRINSIC_WITH_CC(s390_vceqfs); INTRINSIC_WITH_CC(s390_vceqgs); INTRINSIC_WITH_CC(s390_vchbs); INTRINSIC_WITH_CC(s390_vchhs); INTRINSIC_WITH_CC(s390_vchfs); INTRINSIC_WITH_CC(s390_vchgs); INTRINSIC_WITH_CC(s390_vchlbs); INTRINSIC_WITH_CC(s390_vchlhs); INTRINSIC_WITH_CC(s390_vchlfs); INTRINSIC_WITH_CC(s390_vchlgs); INTRINSIC_WITH_CC(s390_vfaebs); INTRINSIC_WITH_CC(s390_vfaehs); INTRINSIC_WITH_CC(s390_vfaefs); INTRINSIC_WITH_CC(s390_vfaezbs); INTRINSIC_WITH_CC(s390_vfaezhs); INTRINSIC_WITH_CC(s390_vfaezfs); INTRINSIC_WITH_CC(s390_vfeebs); INTRINSIC_WITH_CC(s390_vfeehs); INTRINSIC_WITH_CC(s390_vfeefs); INTRINSIC_WITH_CC(s390_vfeezbs); INTRINSIC_WITH_CC(s390_vfeezhs); INTRINSIC_WITH_CC(s390_vfeezfs); INTRINSIC_WITH_CC(s390_vfenebs); INTRINSIC_WITH_CC(s390_vfenehs); INTRINSIC_WITH_CC(s390_vfenefs); INTRINSIC_WITH_CC(s390_vfenezbs); INTRINSIC_WITH_CC(s390_vfenezhs); INTRINSIC_WITH_CC(s390_vfenezfs); INTRINSIC_WITH_CC(s390_vistrbs); INTRINSIC_WITH_CC(s390_vistrhs); INTRINSIC_WITH_CC(s390_vistrfs); INTRINSIC_WITH_CC(s390_vstrcbs); INTRINSIC_WITH_CC(s390_vstrchs); INTRINSIC_WITH_CC(s390_vstrcfs); INTRINSIC_WITH_CC(s390_vstrczbs); INTRINSIC_WITH_CC(s390_vstrczhs); INTRINSIC_WITH_CC(s390_vstrczfs); INTRINSIC_WITH_CC(s390_vfcesbs); INTRINSIC_WITH_CC(s390_vfcedbs); INTRINSIC_WITH_CC(s390_vfchsbs); INTRINSIC_WITH_CC(s390_vfchdbs); INTRINSIC_WITH_CC(s390_vfchesbs); INTRINSIC_WITH_CC(s390_vfchedbs); INTRINSIC_WITH_CC(s390_vftcisb); INTRINSIC_WITH_CC(s390_vftcidb); INTRINSIC_WITH_CC(s390_vstrsb); INTRINSIC_WITH_CC(s390_vstrsh); INTRINSIC_WITH_CC(s390_vstrsf); INTRINSIC_WITH_CC(s390_vstrszb); INTRINSIC_WITH_CC(s390_vstrszh); INTRINSIC_WITH_CC(s390_vstrszf); #undef INTRINSIC_WITH_CC default: return nullptr; } } namespace { // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant. struct NVPTXMmaLdstInfo { unsigned NumResults; // Number of elements to load/store // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported. unsigned IID_col; unsigned IID_row; }; #define MMA_INTR(geom_op_type, layout) \ Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride #define MMA_LDST(n, geom_op_type) \ { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) } static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) { switch (BuiltinID) { // FP MMA loads case NVPTX::BI__hmma_m16n16k16_ld_a: return MMA_LDST(8, m16n16k16_load_a_f16); case NVPTX::BI__hmma_m16n16k16_ld_b: return MMA_LDST(8, m16n16k16_load_b_f16); case NVPTX::BI__hmma_m16n16k16_ld_c_f16: return MMA_LDST(4, m16n16k16_load_c_f16); case NVPTX::BI__hmma_m16n16k16_ld_c_f32: return MMA_LDST(8, m16n16k16_load_c_f32); case NVPTX::BI__hmma_m32n8k16_ld_a: return MMA_LDST(8, m32n8k16_load_a_f16); case NVPTX::BI__hmma_m32n8k16_ld_b: return MMA_LDST(8, m32n8k16_load_b_f16); case NVPTX::BI__hmma_m32n8k16_ld_c_f16: return MMA_LDST(4, m32n8k16_load_c_f16); case NVPTX::BI__hmma_m32n8k16_ld_c_f32: return MMA_LDST(8, m32n8k16_load_c_f32); case NVPTX::BI__hmma_m8n32k16_ld_a: return MMA_LDST(8, m8n32k16_load_a_f16); case NVPTX::BI__hmma_m8n32k16_ld_b: return MMA_LDST(8, m8n32k16_load_b_f16); case NVPTX::BI__hmma_m8n32k16_ld_c_f16: return MMA_LDST(4, m8n32k16_load_c_f16); case NVPTX::BI__hmma_m8n32k16_ld_c_f32: return MMA_LDST(8, m8n32k16_load_c_f32); // Integer MMA loads case NVPTX::BI__imma_m16n16k16_ld_a_s8: return MMA_LDST(2, m16n16k16_load_a_s8); case NVPTX::BI__imma_m16n16k16_ld_a_u8: return MMA_LDST(2, m16n16k16_load_a_u8); case NVPTX::BI__imma_m16n16k16_ld_b_s8: return MMA_LDST(2, m16n16k16_load_b_s8); case NVPTX::BI__imma_m16n16k16_ld_b_u8: return MMA_LDST(2, m16n16k16_load_b_u8); case NVPTX::BI__imma_m16n16k16_ld_c: return MMA_LDST(8, m16n16k16_load_c_s32); case NVPTX::BI__imma_m32n8k16_ld_a_s8: return MMA_LDST(4, m32n8k16_load_a_s8); case NVPTX::BI__imma_m32n8k16_ld_a_u8: return MMA_LDST(4, m32n8k16_load_a_u8); case NVPTX::BI__imma_m32n8k16_ld_b_s8: return MMA_LDST(1, m32n8k16_load_b_s8); case NVPTX::BI__imma_m32n8k16_ld_b_u8: return MMA_LDST(1, m32n8k16_load_b_u8); case NVPTX::BI__imma_m32n8k16_ld_c: return MMA_LDST(8, m32n8k16_load_c_s32); case NVPTX::BI__imma_m8n32k16_ld_a_s8: return MMA_LDST(1, m8n32k16_load_a_s8); case NVPTX::BI__imma_m8n32k16_ld_a_u8: return MMA_LDST(1, m8n32k16_load_a_u8); case NVPTX::BI__imma_m8n32k16_ld_b_s8: return MMA_LDST(4, m8n32k16_load_b_s8); case NVPTX::BI__imma_m8n32k16_ld_b_u8: return MMA_LDST(4, m8n32k16_load_b_u8); case NVPTX::BI__imma_m8n32k16_ld_c: return MMA_LDST(8, m8n32k16_load_c_s32); // Sub-integer MMA loads. // Only row/col layout is supported by A/B fragments. case NVPTX::BI__imma_m8n8k32_ld_a_s4: return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)}; case NVPTX::BI__imma_m8n8k32_ld_a_u4: return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)}; case NVPTX::BI__imma_m8n8k32_ld_b_s4: return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0}; case NVPTX::BI__imma_m8n8k32_ld_b_u4: return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0}; case NVPTX::BI__imma_m8n8k32_ld_c: return MMA_LDST(2, m8n8k32_load_c_s32); case NVPTX::BI__bmma_m8n8k128_ld_a_b1: return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)}; case NVPTX::BI__bmma_m8n8k128_ld_b_b1: return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0}; case NVPTX::BI__bmma_m8n8k128_ld_c: return MMA_LDST(2, m8n8k128_load_c_s32); // Double MMA loads case NVPTX::BI__dmma_m8n8k4_ld_a: return MMA_LDST(1, m8n8k4_load_a_f64); case NVPTX::BI__dmma_m8n8k4_ld_b: return MMA_LDST(1, m8n8k4_load_b_f64); case NVPTX::BI__dmma_m8n8k4_ld_c: return MMA_LDST(2, m8n8k4_load_c_f64); // Alternate float MMA loads case NVPTX::BI__mma_bf16_m16n16k16_ld_a: return MMA_LDST(4, m16n16k16_load_a_bf16); case NVPTX::BI__mma_bf16_m16n16k16_ld_b: return MMA_LDST(4, m16n16k16_load_b_bf16); case NVPTX::BI__mma_bf16_m8n32k16_ld_a: return MMA_LDST(2, m8n32k16_load_a_bf16); case NVPTX::BI__mma_bf16_m8n32k16_ld_b: return MMA_LDST(8, m8n32k16_load_b_bf16); case NVPTX::BI__mma_bf16_m32n8k16_ld_a: return MMA_LDST(8, m32n8k16_load_a_bf16); case NVPTX::BI__mma_bf16_m32n8k16_ld_b: return MMA_LDST(2, m32n8k16_load_b_bf16); case NVPTX::BI__mma_tf32_m16n16k8_ld_a: return MMA_LDST(4, m16n16k8_load_a_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_b: return MMA_LDST(4, m16n16k8_load_b_tf32); case NVPTX::BI__mma_tf32_m16n16k8_ld_c: return MMA_LDST(8, m16n16k8_load_c_f32); // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike // PTX and LLVM IR where stores always use fragment D, NVCC builtins always // use fragment C for both loads and stores. // FP MMA stores. case NVPTX::BI__hmma_m16n16k16_st_c_f16: return MMA_LDST(4, m16n16k16_store_d_f16); case NVPTX::BI__hmma_m16n16k16_st_c_f32: return MMA_LDST(8, m16n16k16_store_d_f32); case NVPTX::BI__hmma_m32n8k16_st_c_f16: return MMA_LDST(4, m32n8k16_store_d_f16); case NVPTX::BI__hmma_m32n8k16_st_c_f32: return MMA_LDST(8, m32n8k16_store_d_f32); case NVPTX::BI__hmma_m8n32k16_st_c_f16: return MMA_LDST(4, m8n32k16_store_d_f16); case NVPTX::BI__hmma_m8n32k16_st_c_f32: return MMA_LDST(8, m8n32k16_store_d_f32); // Integer and sub-integer MMA stores. // Another naming quirk. Unlike other MMA builtins that use PTX types in the // name, integer loads/stores use LLVM's i32. case NVPTX::BI__imma_m16n16k16_st_c_i32: return MMA_LDST(8, m16n16k16_store_d_s32); case NVPTX::BI__imma_m32n8k16_st_c_i32: return MMA_LDST(8, m32n8k16_store_d_s32); case NVPTX::BI__imma_m8n32k16_st_c_i32: return MMA_LDST(8, m8n32k16_store_d_s32); case NVPTX::BI__imma_m8n8k32_st_c_i32: return MMA_LDST(2, m8n8k32_store_d_s32); case NVPTX::BI__bmma_m8n8k128_st_c_i32: return MMA_LDST(2, m8n8k128_store_d_s32); // Double MMA store case NVPTX::BI__dmma_m8n8k4_st_c_f64: return MMA_LDST(2, m8n8k4_store_d_f64); // Alternate float MMA store case NVPTX::BI__mma_m16n16k8_st_c_f32: return MMA_LDST(8, m16n16k8_store_d_f32); default: llvm_unreachable("Unknown MMA builtin"); } } #undef MMA_LDST #undef MMA_INTR struct NVPTXMmaInfo { unsigned NumEltsA; unsigned NumEltsB; unsigned NumEltsC; unsigned NumEltsD; // Variants are ordered by layout-A/layout-B/satf, where 'row' has priority // over 'col' for layout. The index of non-satf variants is expected to match // the undocumented layout constants used by CUDA's mma.hpp. std::array Variants; unsigned getMMAIntrinsic(int Layout, bool Satf) { unsigned Index = Layout + 4 * Satf; if (Index >= Variants.size()) return 0; return Variants[Index]; } }; // Returns an intrinsic that matches Layout and Satf for valid combinations of // Layout and Satf, 0 otherwise. static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) { // clang-format off #define MMA_VARIANTS(geom, type) \ Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \ Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type #define MMA_SATF_VARIANTS(geom, type) \ MMA_VARIANTS(geom, type), \ Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \ Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite // Sub-integer MMA only supports row.col layout. #define MMA_VARIANTS_I4(geom, type) \ 0, \ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \ 0, \ 0, \ 0, \ Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \ 0, \ 0 // b1 MMA does not support .satfinite. #define MMA_VARIANTS_B1_XOR(geom, type) \ 0, \ Intrinsic::nvvm_wmma_##geom##_mma_xor_popc_row_col_##type, \ 0, \ 0, \ 0, \ 0, \ 0, \ 0 #define MMA_VARIANTS_B1_AND(geom, type) \ 0, \ Intrinsic::nvvm_wmma_##geom##_mma_and_popc_row_col_##type, \ 0, \ 0, \ 0, \ 0, \ 0, \ 0 // clang-format on switch (BuiltinID) { // FP MMA // Note that 'type' argument of MMA_SATF_VARIANTS uses D_C notation, while // NumEltsN of return value are ordered as A,B,C,D. case NVPTX::BI__hmma_m16n16k16_mma_f16f16: return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f16)}}}; case NVPTX::BI__hmma_m16n16k16_mma_f32f16: return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f16)}}}; case NVPTX::BI__hmma_m16n16k16_mma_f16f32: return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m16n16k16, f16_f32)}}}; case NVPTX::BI__hmma_m16n16k16_mma_f32f32: return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, f32_f32)}}}; case NVPTX::BI__hmma_m32n8k16_mma_f16f16: return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f16)}}}; case NVPTX::BI__hmma_m32n8k16_mma_f32f16: return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f16)}}}; case NVPTX::BI__hmma_m32n8k16_mma_f16f32: return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m32n8k16, f16_f32)}}}; case NVPTX::BI__hmma_m32n8k16_mma_f32f32: return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, f32_f32)}}}; case NVPTX::BI__hmma_m8n32k16_mma_f16f16: return {8, 8, 4, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f16)}}}; case NVPTX::BI__hmma_m8n32k16_mma_f32f16: return {8, 8, 4, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f16)}}}; case NVPTX::BI__hmma_m8n32k16_mma_f16f32: return {8, 8, 8, 4, {{MMA_SATF_VARIANTS(m8n32k16, f16_f32)}}}; case NVPTX::BI__hmma_m8n32k16_mma_f32f32: return {8, 8, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, f32_f32)}}}; // Integer MMA case NVPTX::BI__imma_m16n16k16_mma_s8: return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, s8)}}}; case NVPTX::BI__imma_m16n16k16_mma_u8: return {2, 2, 8, 8, {{MMA_SATF_VARIANTS(m16n16k16, u8)}}}; case NVPTX::BI__imma_m32n8k16_mma_s8: return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, s8)}}}; case NVPTX::BI__imma_m32n8k16_mma_u8: return {4, 1, 8, 8, {{MMA_SATF_VARIANTS(m32n8k16, u8)}}}; case NVPTX::BI__imma_m8n32k16_mma_s8: return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, s8)}}}; case NVPTX::BI__imma_m8n32k16_mma_u8: return {1, 4, 8, 8, {{MMA_SATF_VARIANTS(m8n32k16, u8)}}}; // Sub-integer MMA case NVPTX::BI__imma_m8n8k32_mma_s4: return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, s4)}}}; case NVPTX::BI__imma_m8n8k32_mma_u4: return {1, 1, 2, 2, {{MMA_VARIANTS_I4(m8n8k32, u4)}}}; case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: return {1, 1, 2, 2, {{MMA_VARIANTS_B1_XOR(m8n8k128, b1)}}}; case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1: return {1, 1, 2, 2, {{MMA_VARIANTS_B1_AND(m8n8k128, b1)}}}; // Double MMA case NVPTX::BI__dmma_m8n8k4_mma_f64: return {1, 1, 2, 2, {{MMA_VARIANTS(m8n8k4, f64)}}}; // Alternate FP MMA case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k16, bf16)}}}; case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: return {2, 8, 8, 8, {{MMA_VARIANTS(m8n32k16, bf16)}}}; case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: return {8, 2, 8, 8, {{MMA_VARIANTS(m32n8k16, bf16)}}}; case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: return {4, 4, 8, 8, {{MMA_VARIANTS(m16n16k8, tf32)}}}; default: llvm_unreachable("Unexpected builtin ID."); } #undef MMA_VARIANTS #undef MMA_SATF_VARIANTS #undef MMA_VARIANTS_I4 #undef MMA_VARIANTS_B1_AND #undef MMA_VARIANTS_B1_XOR } static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF, const CallExpr *E) { Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); QualType ArgType = E->getArg(0)->getType(); clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType); llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType()); return CGF.Builder.CreateCall( CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())}); } static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF, const CallExpr *E) { Value *Ptr = CGF.EmitScalarExpr(E->getArg(0)); llvm::Type *ElemTy = CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); return CGF.Builder.CreateCall( CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}), {Ptr, CGF.EmitScalarExpr(E->getArg(1))}); } static Value *MakeCpAsync(unsigned IntrinsicID, unsigned IntrinsicIDS, CodeGenFunction &CGF, const CallExpr *E, int SrcSize) { return E->getNumArgs() == 3 ? CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicIDS), {CGF.EmitScalarExpr(E->getArg(0)), CGF.EmitScalarExpr(E->getArg(1)), CGF.EmitScalarExpr(E->getArg(2))}) : CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IntrinsicID), {CGF.EmitScalarExpr(E->getArg(0)), CGF.EmitScalarExpr(E->getArg(1))}); } static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID, const CallExpr *E, CodeGenFunction &CGF) { auto &C = CGF.CGM.getContext(); if (!(C.getLangOpts().NativeHalfType || !C.getTargetInfo().useFP16ConversionIntrinsics())) { CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() + " requires native half type support."); return nullptr; } if (IntrinsicID == Intrinsic::nvvm_ldg_global_f || IntrinsicID == Intrinsic::nvvm_ldu_global_f) return MakeLdgLdu(IntrinsicID, CGF, E); SmallVector Args; auto *F = CGF.CGM.getIntrinsic(IntrinsicID); auto *FTy = F->getFunctionType(); unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; C.GetBuiltinType(BuiltinID, Error, &ICEArguments); assert(Error == ASTContext::GE_None && "Should not codegen an error"); for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { assert((ICEArguments & (1 << i)) == 0); auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i)); auto *PTy = FTy->getParamType(i); if (PTy != ArgValue->getType()) ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy); Args.push_back(ArgValue); } return CGF.Builder.CreateCall(F, Args); } } // namespace Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { case NVPTX::BI__nvvm_atom_add_gen_i: case NVPTX::BI__nvvm_atom_add_gen_l: case NVPTX::BI__nvvm_atom_add_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); case NVPTX::BI__nvvm_atom_sub_gen_i: case NVPTX::BI__nvvm_atom_sub_gen_l: case NVPTX::BI__nvvm_atom_sub_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); case NVPTX::BI__nvvm_atom_and_gen_i: case NVPTX::BI__nvvm_atom_and_gen_l: case NVPTX::BI__nvvm_atom_and_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); case NVPTX::BI__nvvm_atom_or_gen_i: case NVPTX::BI__nvvm_atom_or_gen_l: case NVPTX::BI__nvvm_atom_or_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); case NVPTX::BI__nvvm_atom_xor_gen_i: case NVPTX::BI__nvvm_atom_xor_gen_l: case NVPTX::BI__nvvm_atom_xor_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); case NVPTX::BI__nvvm_atom_xchg_gen_i: case NVPTX::BI__nvvm_atom_xchg_gen_l: case NVPTX::BI__nvvm_atom_xchg_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); case NVPTX::BI__nvvm_atom_max_gen_i: case NVPTX::BI__nvvm_atom_max_gen_l: case NVPTX::BI__nvvm_atom_max_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); case NVPTX::BI__nvvm_atom_max_gen_ui: case NVPTX::BI__nvvm_atom_max_gen_ul: case NVPTX::BI__nvvm_atom_max_gen_ull: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); case NVPTX::BI__nvvm_atom_min_gen_i: case NVPTX::BI__nvvm_atom_min_gen_l: case NVPTX::BI__nvvm_atom_min_gen_ll: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); case NVPTX::BI__nvvm_atom_min_gen_ui: case NVPTX::BI__nvvm_atom_min_gen_ul: case NVPTX::BI__nvvm_atom_min_gen_ull: return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); case NVPTX::BI__nvvm_atom_cas_gen_i: case NVPTX::BI__nvvm_atom_cas_gen_l: case NVPTX::BI__nvvm_atom_cas_gen_ll: // __nvvm_atom_cas_gen_* should return the old value rather than the // success flag. return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); case NVPTX::BI__nvvm_atom_add_gen_f: case NVPTX::BI__nvvm_atom_add_gen_d: { Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val, AtomicOrdering::SequentiallyConsistent); } case NVPTX::BI__nvvm_atom_inc_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); Function *FnALI32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); return Builder.CreateCall(FnALI32, {Ptr, Val}); } case NVPTX::BI__nvvm_atom_dec_gen_ui: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); Function *FnALD32 = CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); return Builder.CreateCall(FnALD32, {Ptr, Val}); } case NVPTX::BI__nvvm_ldg_c: case NVPTX::BI__nvvm_ldg_sc: case NVPTX::BI__nvvm_ldg_c2: case NVPTX::BI__nvvm_ldg_sc2: case NVPTX::BI__nvvm_ldg_c4: case NVPTX::BI__nvvm_ldg_sc4: case NVPTX::BI__nvvm_ldg_s: case NVPTX::BI__nvvm_ldg_s2: case NVPTX::BI__nvvm_ldg_s4: case NVPTX::BI__nvvm_ldg_i: case NVPTX::BI__nvvm_ldg_i2: case NVPTX::BI__nvvm_ldg_i4: case NVPTX::BI__nvvm_ldg_l: case NVPTX::BI__nvvm_ldg_l2: case NVPTX::BI__nvvm_ldg_ll: case NVPTX::BI__nvvm_ldg_ll2: case NVPTX::BI__nvvm_ldg_uc: case NVPTX::BI__nvvm_ldg_uc2: case NVPTX::BI__nvvm_ldg_uc4: case NVPTX::BI__nvvm_ldg_us: case NVPTX::BI__nvvm_ldg_us2: case NVPTX::BI__nvvm_ldg_us4: case NVPTX::BI__nvvm_ldg_ui: case NVPTX::BI__nvvm_ldg_ui2: case NVPTX::BI__nvvm_ldg_ui4: case NVPTX::BI__nvvm_ldg_ul: case NVPTX::BI__nvvm_ldg_ul2: case NVPTX::BI__nvvm_ldg_ull: case NVPTX::BI__nvvm_ldg_ull2: // PTX Interoperability section 2.2: "For a vector with an even number of // elements, its alignment is set to number of elements times the alignment // of its member: n*alignof(t)." return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E); case NVPTX::BI__nvvm_ldg_f: case NVPTX::BI__nvvm_ldg_f2: case NVPTX::BI__nvvm_ldg_f4: case NVPTX::BI__nvvm_ldg_d: case NVPTX::BI__nvvm_ldg_d2: return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E); case NVPTX::BI__nvvm_ldu_c: case NVPTX::BI__nvvm_ldu_sc: case NVPTX::BI__nvvm_ldu_c2: case NVPTX::BI__nvvm_ldu_sc2: case NVPTX::BI__nvvm_ldu_c4: case NVPTX::BI__nvvm_ldu_sc4: case NVPTX::BI__nvvm_ldu_s: case NVPTX::BI__nvvm_ldu_s2: case NVPTX::BI__nvvm_ldu_s4: case NVPTX::BI__nvvm_ldu_i: case NVPTX::BI__nvvm_ldu_i2: case NVPTX::BI__nvvm_ldu_i4: case NVPTX::BI__nvvm_ldu_l: case NVPTX::BI__nvvm_ldu_l2: case NVPTX::BI__nvvm_ldu_ll: case NVPTX::BI__nvvm_ldu_ll2: case NVPTX::BI__nvvm_ldu_uc: case NVPTX::BI__nvvm_ldu_uc2: case NVPTX::BI__nvvm_ldu_uc4: case NVPTX::BI__nvvm_ldu_us: case NVPTX::BI__nvvm_ldu_us2: case NVPTX::BI__nvvm_ldu_us4: case NVPTX::BI__nvvm_ldu_ui: case NVPTX::BI__nvvm_ldu_ui2: case NVPTX::BI__nvvm_ldu_ui4: case NVPTX::BI__nvvm_ldu_ul: case NVPTX::BI__nvvm_ldu_ul2: case NVPTX::BI__nvvm_ldu_ull: case NVPTX::BI__nvvm_ldu_ull2: return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E); case NVPTX::BI__nvvm_ldu_f: case NVPTX::BI__nvvm_ldu_f2: case NVPTX::BI__nvvm_ldu_f4: case NVPTX::BI__nvvm_ldu_d: case NVPTX::BI__nvvm_ldu_d2: return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E); case NVPTX::BI__nvvm_atom_cta_add_gen_i: case NVPTX::BI__nvvm_atom_cta_add_gen_l: case NVPTX::BI__nvvm_atom_cta_add_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_add_gen_i: case NVPTX::BI__nvvm_atom_sys_add_gen_l: case NVPTX::BI__nvvm_atom_sys_add_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_add_gen_f: case NVPTX::BI__nvvm_atom_cta_add_gen_d: return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_add_gen_f: case NVPTX::BI__nvvm_atom_sys_add_gen_d: return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_xchg_gen_i: case NVPTX::BI__nvvm_atom_cta_xchg_gen_l: case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_xchg_gen_i: case NVPTX::BI__nvvm_atom_sys_xchg_gen_l: case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_max_gen_i: case NVPTX::BI__nvvm_atom_cta_max_gen_ui: case NVPTX::BI__nvvm_atom_cta_max_gen_l: case NVPTX::BI__nvvm_atom_cta_max_gen_ul: case NVPTX::BI__nvvm_atom_cta_max_gen_ll: case NVPTX::BI__nvvm_atom_cta_max_gen_ull: return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_max_gen_i: case NVPTX::BI__nvvm_atom_sys_max_gen_ui: case NVPTX::BI__nvvm_atom_sys_max_gen_l: case NVPTX::BI__nvvm_atom_sys_max_gen_ul: case NVPTX::BI__nvvm_atom_sys_max_gen_ll: case NVPTX::BI__nvvm_atom_sys_max_gen_ull: return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_min_gen_i: case NVPTX::BI__nvvm_atom_cta_min_gen_ui: case NVPTX::BI__nvvm_atom_cta_min_gen_l: case NVPTX::BI__nvvm_atom_cta_min_gen_ul: case NVPTX::BI__nvvm_atom_cta_min_gen_ll: case NVPTX::BI__nvvm_atom_cta_min_gen_ull: return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_min_gen_i: case NVPTX::BI__nvvm_atom_sys_min_gen_ui: case NVPTX::BI__nvvm_atom_sys_min_gen_l: case NVPTX::BI__nvvm_atom_sys_min_gen_ul: case NVPTX::BI__nvvm_atom_sys_min_gen_ll: case NVPTX::BI__nvvm_atom_sys_min_gen_ull: return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_inc_gen_ui: return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_cta_dec_gen_ui: return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_inc_gen_ui: return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_sys_dec_gen_ui: return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_and_gen_i: case NVPTX::BI__nvvm_atom_cta_and_gen_l: case NVPTX::BI__nvvm_atom_cta_and_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_and_gen_i: case NVPTX::BI__nvvm_atom_sys_and_gen_l: case NVPTX::BI__nvvm_atom_sys_and_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_or_gen_i: case NVPTX::BI__nvvm_atom_cta_or_gen_l: case NVPTX::BI__nvvm_atom_cta_or_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_or_gen_i: case NVPTX::BI__nvvm_atom_sys_or_gen_l: case NVPTX::BI__nvvm_atom_sys_or_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_xor_gen_i: case NVPTX::BI__nvvm_atom_cta_xor_gen_l: case NVPTX::BI__nvvm_atom_cta_xor_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E); case NVPTX::BI__nvvm_atom_sys_xor_gen_i: case NVPTX::BI__nvvm_atom_sys_xor_gen_l: case NVPTX::BI__nvvm_atom_sys_xor_gen_ll: return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E); case NVPTX::BI__nvvm_atom_cta_cas_gen_i: case NVPTX::BI__nvvm_atom_cta_cas_gen_l: case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: { Value *Ptr = EmitScalarExpr(E->getArg(0)); llvm::Type *ElemTy = ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); return Builder.CreateCall( CGM.getIntrinsic( Intrinsic::nvvm_atomic_cas_gen_i_cta, {ElemTy, Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); } case NVPTX::BI__nvvm_atom_sys_cas_gen_i: case NVPTX::BI__nvvm_atom_sys_cas_gen_l: case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: { Value *Ptr = EmitScalarExpr(E->getArg(0)); llvm::Type *ElemTy = ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType()); return Builder.CreateCall( CGM.getIntrinsic( Intrinsic::nvvm_atomic_cas_gen_i_sys, {ElemTy, Ptr->getType()}), {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))}); } case NVPTX::BI__nvvm_match_all_sync_i32p: case NVPTX::BI__nvvm_match_all_sync_i64p: { Value *Mask = EmitScalarExpr(E->getArg(0)); Value *Val = EmitScalarExpr(E->getArg(1)); Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2)); Value *ResultPair = Builder.CreateCall( CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p ? Intrinsic::nvvm_match_all_sync_i32p : Intrinsic::nvvm_match_all_sync_i64p), {Mask, Val}); Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1), PredOutPtr.getElementType()); Builder.CreateStore(Pred, PredOutPtr); return Builder.CreateExtractValue(ResultPair, 0); } // FP MMA loads case NVPTX::BI__hmma_m16n16k16_ld_a: case NVPTX::BI__hmma_m16n16k16_ld_b: case NVPTX::BI__hmma_m16n16k16_ld_c_f16: case NVPTX::BI__hmma_m16n16k16_ld_c_f32: case NVPTX::BI__hmma_m32n8k16_ld_a: case NVPTX::BI__hmma_m32n8k16_ld_b: case NVPTX::BI__hmma_m32n8k16_ld_c_f16: case NVPTX::BI__hmma_m32n8k16_ld_c_f32: case NVPTX::BI__hmma_m8n32k16_ld_a: case NVPTX::BI__hmma_m8n32k16_ld_b: case NVPTX::BI__hmma_m8n32k16_ld_c_f16: case NVPTX::BI__hmma_m8n32k16_ld_c_f32: // Integer MMA loads. case NVPTX::BI__imma_m16n16k16_ld_a_s8: case NVPTX::BI__imma_m16n16k16_ld_a_u8: case NVPTX::BI__imma_m16n16k16_ld_b_s8: case NVPTX::BI__imma_m16n16k16_ld_b_u8: case NVPTX::BI__imma_m16n16k16_ld_c: case NVPTX::BI__imma_m32n8k16_ld_a_s8: case NVPTX::BI__imma_m32n8k16_ld_a_u8: case NVPTX::BI__imma_m32n8k16_ld_b_s8: case NVPTX::BI__imma_m32n8k16_ld_b_u8: case NVPTX::BI__imma_m32n8k16_ld_c: case NVPTX::BI__imma_m8n32k16_ld_a_s8: case NVPTX::BI__imma_m8n32k16_ld_a_u8: case NVPTX::BI__imma_m8n32k16_ld_b_s8: case NVPTX::BI__imma_m8n32k16_ld_b_u8: case NVPTX::BI__imma_m8n32k16_ld_c: // Sub-integer MMA loads. case NVPTX::BI__imma_m8n8k32_ld_a_s4: case NVPTX::BI__imma_m8n8k32_ld_a_u4: case NVPTX::BI__imma_m8n8k32_ld_b_s4: case NVPTX::BI__imma_m8n8k32_ld_b_u4: case NVPTX::BI__imma_m8n8k32_ld_c: case NVPTX::BI__bmma_m8n8k128_ld_a_b1: case NVPTX::BI__bmma_m8n8k128_ld_b_b1: case NVPTX::BI__bmma_m8n8k128_ld_c: // Double MMA loads. case NVPTX::BI__dmma_m8n8k4_ld_a: case NVPTX::BI__dmma_m8n8k4_ld_b: case NVPTX::BI__dmma_m8n8k4_ld_c: // Alternate float MMA loads. case NVPTX::BI__mma_bf16_m16n16k16_ld_a: case NVPTX::BI__mma_bf16_m16n16k16_ld_b: case NVPTX::BI__mma_bf16_m8n32k16_ld_a: case NVPTX::BI__mma_bf16_m8n32k16_ld_b: case NVPTX::BI__mma_bf16_m32n8k16_ld_a: case NVPTX::BI__mma_bf16_m32n8k16_ld_b: case NVPTX::BI__mma_tf32_m16n16k8_ld_a: case NVPTX::BI__mma_tf32_m16n16k8_ld_b: case NVPTX::BI__mma_tf32_m16n16k8_ld_c: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Value *Src = EmitScalarExpr(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); std::optional isColMajorArg = E->getArg(3)->getIntegerConstantExpr(getContext()); if (!isColMajorArg) return nullptr; bool isColMajor = isColMajorArg->getSExtValue(); NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); unsigned IID = isColMajor ? II.IID_col : II.IID_row; if (IID == 0) return nullptr; Value *Result = Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm}); // Save returned values. assert(II.NumResults); if (II.NumResults == 1) { Builder.CreateAlignedStore(Result, Dst.emitRawPointer(*this), CharUnits::fromQuantity(4)); } else { for (unsigned i = 0; i < II.NumResults; ++i) { Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), Dst.getElementType()), Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); } } return Result; } case NVPTX::BI__hmma_m16n16k16_st_c_f16: case NVPTX::BI__hmma_m16n16k16_st_c_f32: case NVPTX::BI__hmma_m32n8k16_st_c_f16: case NVPTX::BI__hmma_m32n8k16_st_c_f32: case NVPTX::BI__hmma_m8n32k16_st_c_f16: case NVPTX::BI__hmma_m8n32k16_st_c_f32: case NVPTX::BI__imma_m16n16k16_st_c_i32: case NVPTX::BI__imma_m32n8k16_st_c_i32: case NVPTX::BI__imma_m8n32k16_st_c_i32: case NVPTX::BI__imma_m8n8k32_st_c_i32: case NVPTX::BI__bmma_m8n8k128_st_c_i32: case NVPTX::BI__dmma_m8n8k4_st_c_f64: case NVPTX::BI__mma_m16n16k8_st_c_f32: { Value *Dst = EmitScalarExpr(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *Ldm = EmitScalarExpr(E->getArg(2)); std::optional isColMajorArg = E->getArg(3)->getIntegerConstantExpr(getContext()); if (!isColMajorArg) return nullptr; bool isColMajor = isColMajorArg->getSExtValue(); NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID); unsigned IID = isColMajor ? II.IID_col : II.IID_row; if (IID == 0) return nullptr; Function *Intrinsic = CGM.getIntrinsic(IID, Dst->getType()); llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1); SmallVector Values = {Dst}; for (unsigned i = 0; i < II.NumResults; ++i) { Value *V = Builder.CreateAlignedLoad( Src.getElementType(), Builder.CreateGEP(Src.getElementType(), Src.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, ParamType)); } Values.push_back(Ldm); Value *Result = Builder.CreateCall(Intrinsic, Values); return Result; } // BI__hmma_m16n16k16_mma_(d, a, b, c, layout, satf) --> // Intrinsic::nvvm_wmma_m16n16k16_mma_sync case NVPTX::BI__hmma_m16n16k16_mma_f16f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f16: case NVPTX::BI__hmma_m16n16k16_mma_f32f32: case NVPTX::BI__hmma_m16n16k16_mma_f16f32: case NVPTX::BI__hmma_m32n8k16_mma_f16f16: case NVPTX::BI__hmma_m32n8k16_mma_f32f16: case NVPTX::BI__hmma_m32n8k16_mma_f32f32: case NVPTX::BI__hmma_m32n8k16_mma_f16f32: case NVPTX::BI__hmma_m8n32k16_mma_f16f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f16: case NVPTX::BI__hmma_m8n32k16_mma_f32f32: case NVPTX::BI__hmma_m8n32k16_mma_f16f32: case NVPTX::BI__imma_m16n16k16_mma_s8: case NVPTX::BI__imma_m16n16k16_mma_u8: case NVPTX::BI__imma_m32n8k16_mma_s8: case NVPTX::BI__imma_m32n8k16_mma_u8: case NVPTX::BI__imma_m8n32k16_mma_s8: case NVPTX::BI__imma_m8n32k16_mma_u8: case NVPTX::BI__imma_m8n8k32_mma_s4: case NVPTX::BI__imma_m8n8k32_mma_u4: case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: case NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1: case NVPTX::BI__dmma_m8n8k4_mma_f64: case NVPTX::BI__mma_bf16_m16n16k16_mma_f32: case NVPTX::BI__mma_bf16_m8n32k16_mma_f32: case NVPTX::BI__mma_bf16_m32n8k16_mma_f32: case NVPTX::BI__mma_tf32_m16n16k8_mma_f32: { Address Dst = EmitPointerWithAlignment(E->getArg(0)); Address SrcA = EmitPointerWithAlignment(E->getArg(1)); Address SrcB = EmitPointerWithAlignment(E->getArg(2)); Address SrcC = EmitPointerWithAlignment(E->getArg(3)); std::optional LayoutArg = E->getArg(4)->getIntegerConstantExpr(getContext()); if (!LayoutArg) return nullptr; int Layout = LayoutArg->getSExtValue(); if (Layout < 0 || Layout > 3) return nullptr; llvm::APSInt SatfArg; if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1 || BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_and_popc_b1) SatfArg = 0; // .b1 does not have satf argument. else if (std::optional OptSatfArg = E->getArg(5)->getIntegerConstantExpr(getContext())) SatfArg = *OptSatfArg; else return nullptr; bool Satf = SatfArg.getSExtValue(); NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID); unsigned IID = MI.getMMAIntrinsic(Layout, Satf); if (IID == 0) // Unsupported combination of Layout/Satf. return nullptr; SmallVector Values; Function *Intrinsic = CGM.getIntrinsic(IID); llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0); // Load A for (unsigned i = 0; i < MI.NumEltsA; ++i) { Value *V = Builder.CreateAlignedLoad( SrcA.getElementType(), Builder.CreateGEP(SrcA.getElementType(), SrcA.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, AType)); } // Load B llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA); for (unsigned i = 0; i < MI.NumEltsB; ++i) { Value *V = Builder.CreateAlignedLoad( SrcB.getElementType(), Builder.CreateGEP(SrcB.getElementType(), SrcB.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, BType)); } // Load C llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB); for (unsigned i = 0; i < MI.NumEltsC; ++i) { Value *V = Builder.CreateAlignedLoad( SrcC.getElementType(), Builder.CreateGEP(SrcC.getElementType(), SrcC.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); Values.push_back(Builder.CreateBitCast(V, CType)); } Value *Result = Builder.CreateCall(Intrinsic, Values); llvm::Type *DType = Dst.getElementType(); for (unsigned i = 0; i < MI.NumEltsD; ++i) Builder.CreateAlignedStore( Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType), Builder.CreateGEP(Dst.getElementType(), Dst.emitRawPointer(*this), llvm::ConstantInt::get(IntTy, i)), CharUnits::fromQuantity(4)); return Result; } // The following builtins require half type support case NVPTX::BI__nvvm_ex2_approx_f16: return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_ex2_approx_f16x2: return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_ff2f16x2_rn: return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this); case NVPTX::BI__nvvm_ff2f16x2_rn_relu: return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this); case NVPTX::BI__nvvm_ff2f16x2_rz: return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this); case NVPTX::BI__nvvm_ff2f16x2_rz_relu: return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_f16: return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_f16x2: return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_ftz_f16: return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_ftz_f16x2: return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16: return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2: return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16: return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2: return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_relu_f16: return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_relu_f16x2: return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_sat_f16: return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fma_rn_sat_f16x2: return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_f16: return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_f16: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_nan_f16: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_nan_f16: return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_nan_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_f16: return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_f16: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_nan_f16: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_nan_f16: return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_nan_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_xorsign_abs_f16: return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E, *this); case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2: return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E, *this); case NVPTX::BI__nvvm_ldg_h: return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); case NVPTX::BI__nvvm_ldg_h2: return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this); case NVPTX::BI__nvvm_ldu_h: return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); case NVPTX::BI__nvvm_ldu_h2: { return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this); } case NVPTX::BI__nvvm_cp_async_ca_shared_global_4: return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4, Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E, 4); case NVPTX::BI__nvvm_cp_async_ca_shared_global_8: return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_8, Intrinsic::nvvm_cp_async_ca_shared_global_8_s, *this, E, 8); case NVPTX::BI__nvvm_cp_async_ca_shared_global_16: return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_16, Intrinsic::nvvm_cp_async_ca_shared_global_16_s, *this, E, 16); case NVPTX::BI__nvvm_cp_async_cg_shared_global_16: return MakeCpAsync(Intrinsic::nvvm_cp_async_cg_shared_global_16, Intrinsic::nvvm_cp_async_cg_shared_global_16_s, *this, E, 16); case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_x: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_x)); case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_y: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_y)); case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_z: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_z)); case NVPTX::BI__nvvm_read_ptx_sreg_clusterid_w: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_clusterid_w)); case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_x: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_x)); case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_y: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_y)); case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_z: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_z)); case NVPTX::BI__nvvm_read_ptx_sreg_nclusterid_w: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_nclusterid_w)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_x: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_x)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_y: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_y)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_z: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_z)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctaid_w: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctaid_w)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_x: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_x)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_y: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_y)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_z: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_z)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctaid_w: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctaid_w)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_ctarank: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_ctarank)); case NVPTX::BI__nvvm_read_ptx_sreg_cluster_nctarank: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_read_ptx_sreg_cluster_nctarank)); case NVPTX::BI__nvvm_is_explicit_cluster: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_is_explicit_cluster)); case NVPTX::BI__nvvm_isspacep_shared_cluster: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_isspacep_shared_cluster), EmitScalarExpr(E->getArg(0))); case NVPTX::BI__nvvm_mapa: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_mapa), {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); case NVPTX::BI__nvvm_mapa_shared_cluster: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_mapa_shared_cluster), {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); case NVPTX::BI__nvvm_getctarank: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_getctarank), EmitScalarExpr(E->getArg(0))); case NVPTX::BI__nvvm_getctarank_shared_cluster: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_getctarank_shared_cluster), EmitScalarExpr(E->getArg(0))); case NVPTX::BI__nvvm_barrier_cluster_arrive: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive)); case NVPTX::BI__nvvm_barrier_cluster_arrive_relaxed: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_arrive_relaxed)); case NVPTX::BI__nvvm_barrier_cluster_wait: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_barrier_cluster_wait)); case NVPTX::BI__nvvm_fence_sc_cluster: return Builder.CreateCall( CGM.getIntrinsic(Intrinsic::nvvm_fence_sc_cluster)); default: return nullptr; } } namespace { struct BuiltinAlignArgs { llvm::Value *Src = nullptr; llvm::Type *SrcType = nullptr; llvm::Value *Alignment = nullptr; llvm::Value *Mask = nullptr; llvm::IntegerType *IntType = nullptr; BuiltinAlignArgs(const CallExpr *E, CodeGenFunction &CGF) { QualType AstType = E->getArg(0)->getType(); if (AstType->isArrayType()) Src = CGF.EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(CGF); else Src = CGF.EmitScalarExpr(E->getArg(0)); SrcType = Src->getType(); if (SrcType->isPointerTy()) { IntType = IntegerType::get( CGF.getLLVMContext(), CGF.CGM.getDataLayout().getIndexTypeSizeInBits(SrcType)); } else { assert(SrcType->isIntegerTy()); IntType = cast(SrcType); } Alignment = CGF.EmitScalarExpr(E->getArg(1)); Alignment = CGF.Builder.CreateZExtOrTrunc(Alignment, IntType, "alignment"); auto *One = llvm::ConstantInt::get(IntType, 1); Mask = CGF.Builder.CreateSub(Alignment, One, "mask"); } }; } // namespace /// Generate (x & (y-1)) == 0. RValue CodeGenFunction::EmitBuiltinIsAligned(const CallExpr *E) { BuiltinAlignArgs Args(E, *this); llvm::Value *SrcAddress = Args.Src; if (Args.SrcType->isPointerTy()) SrcAddress = Builder.CreateBitOrPointerCast(Args.Src, Args.IntType, "src_addr"); return RValue::get(Builder.CreateICmpEQ( Builder.CreateAnd(SrcAddress, Args.Mask, "set_bits"), llvm::Constant::getNullValue(Args.IntType), "is_aligned")); } /// Generate (x & ~(y-1)) to align down or ((x+(y-1)) & ~(y-1)) to align up. /// Note: For pointer types we can avoid ptrtoint/inttoptr pairs by using the /// llvm.ptrmask intrinsic (with a GEP before in the align_up case). RValue CodeGenFunction::EmitBuiltinAlignTo(const CallExpr *E, bool AlignUp) { BuiltinAlignArgs Args(E, *this); llvm::Value *SrcForMask = Args.Src; if (AlignUp) { // When aligning up we have to first add the mask to ensure we go over the // next alignment value and then align down to the next valid multiple. // By adding the mask, we ensure that align_up on an already aligned // value will not change the value. if (Args.Src->getType()->isPointerTy()) { if (getLangOpts().isSignedOverflowDefined()) SrcForMask = Builder.CreateGEP(Int8Ty, SrcForMask, Args.Mask, "over_boundary"); else SrcForMask = EmitCheckedInBoundsGEP(Int8Ty, SrcForMask, Args.Mask, /*SignedIndices=*/true, /*isSubtraction=*/false, E->getExprLoc(), "over_boundary"); } else { SrcForMask = Builder.CreateAdd(SrcForMask, Args.Mask, "over_boundary"); } } // Invert the mask to only clear the lower bits. llvm::Value *InvertedMask = Builder.CreateNot(Args.Mask, "inverted_mask"); llvm::Value *Result = nullptr; if (Args.Src->getType()->isPointerTy()) { Result = Builder.CreateIntrinsic( Intrinsic::ptrmask, {Args.SrcType, Args.IntType}, {SrcForMask, InvertedMask}, nullptr, "aligned_result"); } else { Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result"); } assert(Result->getType() == Args.SrcType); return RValue::get(Result); } Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_memory_size: { llvm::Type *ResultType = ConvertType(E->getType()); Value *I = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType); return Builder.CreateCall(Callee, I); } case WebAssembly::BI__builtin_wasm_memory_grow: { llvm::Type *ResultType = ConvertType(E->getType()); Value *Args[] = {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}; Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType); return Builder.CreateCall(Callee, Args); } case WebAssembly::BI__builtin_wasm_tls_size: { llvm::Type *ResultType = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_tls_align: { llvm::Type *ResultType = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_align, ResultType); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_tls_base: { Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_base); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_throw: { Value *Tag = EmitScalarExpr(E->getArg(0)); Value *Obj = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw); return Builder.CreateCall(Callee, {Tag, Obj}); } case WebAssembly::BI__builtin_wasm_rethrow: { Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_memory_atomic_wait32: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait32); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_memory_atomic_wait64: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Expected = EmitScalarExpr(E->getArg(1)); Value *Timeout = EmitScalarExpr(E->getArg(2)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_wait64); return Builder.CreateCall(Callee, {Addr, Expected, Timeout}); } case WebAssembly::BI__builtin_wasm_memory_atomic_notify: { Value *Addr = EmitScalarExpr(E->getArg(0)); Value *Count = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_atomic_notify); return Builder.CreateCall(Callee, {Addr, Count}); } case WebAssembly::BI__builtin_wasm_trunc_s_i32_f32: case WebAssembly::BI__builtin_wasm_trunc_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_s_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_s_i64_f64: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_signed, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_trunc_u_i32_f32: case WebAssembly::BI__builtin_wasm_trunc_u_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_u_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_u_i64_f64: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_unsigned, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64: case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); Function *Callee = CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_min_f32: case WebAssembly::BI__builtin_wasm_min_f64: case WebAssembly::BI__builtin_wasm_min_f16x8: case WebAssembly::BI__builtin_wasm_min_f32x4: case WebAssembly::BI__builtin_wasm_min_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::minimum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_max_f32: case WebAssembly::BI__builtin_wasm_max_f64: case WebAssembly::BI__builtin_wasm_max_f16x8: case WebAssembly::BI__builtin_wasm_max_f32x4: case WebAssembly::BI__builtin_wasm_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::maximum, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_pmin_f16x8: case WebAssembly::BI__builtin_wasm_pmin_f32x4: case WebAssembly::BI__builtin_wasm_pmin_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_pmax_f16x8: case WebAssembly::BI__builtin_wasm_pmax_f32x4: case WebAssembly::BI__builtin_wasm_pmax_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f32x4: case WebAssembly::BI__builtin_wasm_nearest_f32x4: case WebAssembly::BI__builtin_wasm_ceil_f64x2: case WebAssembly::BI__builtin_wasm_floor_f64x2: case WebAssembly::BI__builtin_wasm_trunc_f64x2: case WebAssembly::BI__builtin_wasm_nearest_f64x2: { unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_ceil_f64x2: IntNo = Intrinsic::ceil; break; case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_floor_f64x2: IntNo = Intrinsic::floor; break; case WebAssembly::BI__builtin_wasm_trunc_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f64x2: IntNo = Intrinsic::trunc; break; case WebAssembly::BI__builtin_wasm_nearest_f32x4: case WebAssembly::BI__builtin_wasm_nearest_f64x2: IntNo = Intrinsic::nearbyint; break; default: llvm_unreachable("unexpected builtin ID"); } Value *Value = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, Value); } case WebAssembly::BI__builtin_wasm_ref_null_extern: { Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_extern); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_ref_null_func: { Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_ref_null_func); return Builder.CreateCall(Callee); } case WebAssembly::BI__builtin_wasm_swizzle_i8x16: { Value *Src = EmitScalarExpr(E->getArg(0)); Value *Indices = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_swizzle); return Builder.CreateCall(Callee, {Src, Indices}); } case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16: case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16: case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8: case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8: case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: { unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_add_sat_s_i8x16: case WebAssembly::BI__builtin_wasm_add_sat_s_i16x8: IntNo = Intrinsic::sadd_sat; break; case WebAssembly::BI__builtin_wasm_add_sat_u_i8x16: case WebAssembly::BI__builtin_wasm_add_sat_u_i16x8: IntNo = Intrinsic::uadd_sat; break; case WebAssembly::BI__builtin_wasm_sub_sat_s_i8x16: case WebAssembly::BI__builtin_wasm_sub_sat_s_i16x8: IntNo = Intrinsic::wasm_sub_sat_signed; break; case WebAssembly::BI__builtin_wasm_sub_sat_u_i8x16: case WebAssembly::BI__builtin_wasm_sub_sat_u_i16x8: IntNo = Intrinsic::wasm_sub_sat_unsigned; break; default: llvm_unreachable("unexpected builtin ID"); } Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_abs_i8x16: case WebAssembly::BI__builtin_wasm_abs_i16x8: case WebAssembly::BI__builtin_wasm_abs_i32x4: case WebAssembly::BI__builtin_wasm_abs_i64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); Value *Neg = Builder.CreateNeg(Vec, "neg"); Constant *Zero = llvm::Constant::getNullValue(Vec->getType()); Value *ICmp = Builder.CreateICmpSLT(Vec, Zero, "abscond"); return Builder.CreateSelect(ICmp, Neg, Vec, "abs"); } case WebAssembly::BI__builtin_wasm_min_s_i8x16: case WebAssembly::BI__builtin_wasm_min_u_i8x16: case WebAssembly::BI__builtin_wasm_max_s_i8x16: case WebAssembly::BI__builtin_wasm_max_u_i8x16: case WebAssembly::BI__builtin_wasm_min_s_i16x8: case WebAssembly::BI__builtin_wasm_min_u_i16x8: case WebAssembly::BI__builtin_wasm_max_s_i16x8: case WebAssembly::BI__builtin_wasm_max_u_i16x8: case WebAssembly::BI__builtin_wasm_min_s_i32x4: case WebAssembly::BI__builtin_wasm_min_u_i32x4: case WebAssembly::BI__builtin_wasm_max_s_i32x4: case WebAssembly::BI__builtin_wasm_max_u_i32x4: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Value *ICmp; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_min_s_i8x16: case WebAssembly::BI__builtin_wasm_min_s_i16x8: case WebAssembly::BI__builtin_wasm_min_s_i32x4: ICmp = Builder.CreateICmpSLT(LHS, RHS); break; case WebAssembly::BI__builtin_wasm_min_u_i8x16: case WebAssembly::BI__builtin_wasm_min_u_i16x8: case WebAssembly::BI__builtin_wasm_min_u_i32x4: ICmp = Builder.CreateICmpULT(LHS, RHS); break; case WebAssembly::BI__builtin_wasm_max_s_i8x16: case WebAssembly::BI__builtin_wasm_max_s_i16x8: case WebAssembly::BI__builtin_wasm_max_s_i32x4: ICmp = Builder.CreateICmpSGT(LHS, RHS); break; case WebAssembly::BI__builtin_wasm_max_u_i8x16: case WebAssembly::BI__builtin_wasm_max_u_i16x8: case WebAssembly::BI__builtin_wasm_max_u_i32x4: ICmp = Builder.CreateICmpUGT(LHS, RHS); break; default: llvm_unreachable("unexpected builtin ID"); } return Builder.CreateSelect(ICmp, LHS, RHS); } case WebAssembly::BI__builtin_wasm_avgr_u_i8x16: case WebAssembly::BI__builtin_wasm_avgr_u_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_avgr_unsigned, ConvertType(E->getType())); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_q15mulr_sat_s_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_q15mulr_sat_signed); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: { Value *Vec = EmitScalarExpr(E->getArg(0)); unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_s_i16x8: case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_s_i32x4: IntNo = Intrinsic::wasm_extadd_pairwise_signed; break; case WebAssembly::BI__builtin_wasm_extadd_pairwise_i8x16_u_i16x8: case WebAssembly::BI__builtin_wasm_extadd_pairwise_i16x8_u_i32x4: IntNo = Intrinsic::wasm_extadd_pairwise_unsigned; break; default: llvm_unreachable("unexpected builtin ID"); } Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); return Builder.CreateCall(Callee, Vec); } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect, ConvertType(E->getType())); return Builder.CreateCall(Callee, {V1, V2, C}); } case WebAssembly::BI__builtin_wasm_dot_s_i32x4_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_dot); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_popcnt_i8x16: { Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::ctpop, ConvertType(E->getType())); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_any_true_v128: case WebAssembly::BI__builtin_wasm_all_true_i8x16: case WebAssembly::BI__builtin_wasm_all_true_i16x8: case WebAssembly::BI__builtin_wasm_all_true_i32x4: case WebAssembly::BI__builtin_wasm_all_true_i64x2: { unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_any_true_v128: IntNo = Intrinsic::wasm_anytrue; break; case WebAssembly::BI__builtin_wasm_all_true_i8x16: case WebAssembly::BI__builtin_wasm_all_true_i16x8: case WebAssembly::BI__builtin_wasm_all_true_i32x4: case WebAssembly::BI__builtin_wasm_all_true_i64x2: IntNo = Intrinsic::wasm_alltrue; break; default: llvm_unreachable("unexpected builtin ID"); } Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_bitmask_i8x16: case WebAssembly::BI__builtin_wasm_bitmask_i16x8: case WebAssembly::BI__builtin_wasm_bitmask_i32x4: case WebAssembly::BI__builtin_wasm_bitmask_i64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_abs_f32x4: case WebAssembly::BI__builtin_wasm_abs_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_sqrt_f32x4: case WebAssembly::BI__builtin_wasm_sqrt_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType()); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: { Value *Low = EmitScalarExpr(E->getArg(0)); Value *High = EmitScalarExpr(E->getArg(1)); unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_narrow_s_i8x16_i16x8: case WebAssembly::BI__builtin_wasm_narrow_s_i16x8_i32x4: IntNo = Intrinsic::wasm_narrow_signed; break; case WebAssembly::BI__builtin_wasm_narrow_u_i8x16_i16x8: case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4: IntNo = Intrinsic::wasm_narrow_unsigned; break; default: llvm_unreachable("unexpected builtin ID"); } Function *Callee = CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); return Builder.CreateCall(Callee, {Low, High}); } case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: { Value *Vec = EmitScalarExpr(E->getArg(0)); unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4: IntNo = Intrinsic::fptosi_sat; break; case WebAssembly::BI__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4: IntNo = Intrinsic::fptoui_sat; break; default: llvm_unreachable("unexpected builtin ID"); } llvm::Type *SrcT = Vec->getType(); llvm::Type *TruncT = SrcT->getWithNewType(Builder.getInt32Ty()); Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT}); Value *Trunc = Builder.CreateCall(Callee, Vec); Value *Splat = Constant::getNullValue(TruncT); return Builder.CreateShuffleVector(Trunc, Splat, ArrayRef{0, 1, 2, 3}); } case WebAssembly::BI__builtin_wasm_shuffle_i8x16: { Value *Ops[18]; size_t OpIdx = 0; Ops[OpIdx++] = EmitScalarExpr(E->getArg(0)); Ops[OpIdx++] = EmitScalarExpr(E->getArg(1)); while (OpIdx < 18) { std::optional LaneConst = E->getArg(OpIdx)->getIntegerConstantExpr(getContext()); assert(LaneConst && "Constant arg isn't actually constant?"); Ops[OpIdx++] = llvm::ConstantInt::get(getLLVMContext(), *LaneConst); } Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_shuffle); return Builder.CreateCall(Callee, Ops); } case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: { Value *A = EmitScalarExpr(E->getArg(0)); Value *B = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_relaxed_madd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_madd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_madd_f64x2: IntNo = Intrinsic::wasm_relaxed_madd; break; case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f16x8: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_nmadd_f64x2: IntNo = Intrinsic::wasm_relaxed_nmadd; break; default: llvm_unreachable("unexpected builtin ID"); } Function *Callee = CGM.getIntrinsic(IntNo, A->getType()); return Builder.CreateCall(Callee, {A, B, C}); } case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i8x16: case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i16x8: case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i32x4: case WebAssembly::BI__builtin_wasm_relaxed_laneselect_i64x2: { Value *A = EmitScalarExpr(E->getArg(0)); Value *B = EmitScalarExpr(E->getArg(1)); Value *C = EmitScalarExpr(E->getArg(2)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_laneselect, A->getType()); return Builder.CreateCall(Callee, {A, B, C}); } case WebAssembly::BI__builtin_wasm_relaxed_swizzle_i8x16: { Value *Src = EmitScalarExpr(E->getArg(0)); Value *Indices = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_swizzle); return Builder.CreateCall(Callee, {Src, Indices}); } case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_relaxed_min_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_min_f64x2: IntNo = Intrinsic::wasm_relaxed_min; break; case WebAssembly::BI__builtin_wasm_relaxed_max_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_max_f64x2: IntNo = Intrinsic::wasm_relaxed_max; break; default: llvm_unreachable("unexpected builtin ID"); } Function *Callee = CGM.getIntrinsic(IntNo, LHS->getType()); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: { Value *Vec = EmitScalarExpr(E->getArg(0)); unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_i32x4_f32x4: IntNo = Intrinsic::wasm_relaxed_trunc_signed; break; case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_i32x4_f32x4: IntNo = Intrinsic::wasm_relaxed_trunc_unsigned; break; case WebAssembly::BI__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2: IntNo = Intrinsic::wasm_relaxed_trunc_signed_zero; break; case WebAssembly::BI__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2: IntNo = Intrinsic::wasm_relaxed_trunc_unsigned_zero; break; default: llvm_unreachable("unexpected builtin ID"); } Function *Callee = CGM.getIntrinsic(IntNo); return Builder.CreateCall(Callee, {Vec}); } case WebAssembly::BI__builtin_wasm_relaxed_q15mulr_s_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_q15mulr_signed); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed); return Builder.CreateCall(Callee, {LHS, RHS}); } case WebAssembly::BI__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Value *Acc = EmitScalarExpr(E->getArg(2)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed); return Builder.CreateCall(Callee, {LHS, RHS, Acc}); } case WebAssembly::BI__builtin_wasm_relaxed_dot_bf16x8_add_f32_f32x4: { Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); Value *Acc = EmitScalarExpr(E->getArg(2)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_relaxed_dot_bf16x8_add_f32); return Builder.CreateCall(Callee, {LHS, RHS, Acc}); } case WebAssembly::BI__builtin_wasm_loadf16_f32: { Value *Addr = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_loadf16_f32); return Builder.CreateCall(Callee, {Addr}); } case WebAssembly::BI__builtin_wasm_storef16_f32: { Value *Val = EmitScalarExpr(E->getArg(0)); Value *Addr = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_storef16_f32); return Builder.CreateCall(Callee, {Val, Addr}); } case WebAssembly::BI__builtin_wasm_splat_f16x8: { Value *Val = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_splat_f16x8); return Builder.CreateCall(Callee, {Val}); } case WebAssembly::BI__builtin_wasm_extract_lane_f16x8: { Value *Vector = EmitScalarExpr(E->getArg(0)); Value *Index = EmitScalarExpr(E->getArg(1)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_extract_lane_f16x8); return Builder.CreateCall(Callee, {Vector, Index}); } case WebAssembly::BI__builtin_wasm_table_get: { assert(E->getArg(0)->getType()->isArrayType()); Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Index = EmitScalarExpr(E->getArg(1)); Function *Callee; if (E->getType().isWebAssemblyExternrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_externref); else if (E->getType().isWebAssemblyFuncrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_get_funcref); else llvm_unreachable( "Unexpected reference type for __builtin_wasm_table_get"); return Builder.CreateCall(Callee, {Table, Index}); } case WebAssembly::BI__builtin_wasm_table_set: { assert(E->getArg(0)->getType()->isArrayType()); Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Index = EmitScalarExpr(E->getArg(1)); Value *Val = EmitScalarExpr(E->getArg(2)); Function *Callee; if (E->getArg(2)->getType().isWebAssemblyExternrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_externref); else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_set_funcref); else llvm_unreachable( "Unexpected reference type for __builtin_wasm_table_set"); return Builder.CreateCall(Callee, {Table, Index, Val}); } case WebAssembly::BI__builtin_wasm_table_size: { assert(E->getArg(0)->getType()->isArrayType()); Value *Value = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_size); return Builder.CreateCall(Callee, Value); } case WebAssembly::BI__builtin_wasm_table_grow: { assert(E->getArg(0)->getType()->isArrayType()); Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Val = EmitScalarExpr(E->getArg(1)); Value *NElems = EmitScalarExpr(E->getArg(2)); Function *Callee; if (E->getArg(1)->getType().isWebAssemblyExternrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_grow_externref); else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); else llvm_unreachable( "Unexpected reference type for __builtin_wasm_table_grow"); return Builder.CreateCall(Callee, {Table, Val, NElems}); } case WebAssembly::BI__builtin_wasm_table_fill: { assert(E->getArg(0)->getType()->isArrayType()); Value *Table = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *Index = EmitScalarExpr(E->getArg(1)); Value *Val = EmitScalarExpr(E->getArg(2)); Value *NElems = EmitScalarExpr(E->getArg(3)); Function *Callee; if (E->getArg(2)->getType().isWebAssemblyExternrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_externref); else if (E->getArg(2)->getType().isWebAssemblyFuncrefType()) Callee = CGM.getIntrinsic(Intrinsic::wasm_table_fill_funcref); else llvm_unreachable( "Unexpected reference type for __builtin_wasm_table_fill"); return Builder.CreateCall(Callee, {Table, Index, Val, NElems}); } case WebAssembly::BI__builtin_wasm_table_copy: { assert(E->getArg(0)->getType()->isArrayType()); Value *TableX = EmitArrayToPointerDecay(E->getArg(0)).emitRawPointer(*this); Value *TableY = EmitArrayToPointerDecay(E->getArg(1)).emitRawPointer(*this); Value *DstIdx = EmitScalarExpr(E->getArg(2)); Value *SrcIdx = EmitScalarExpr(E->getArg(3)); Value *NElems = EmitScalarExpr(E->getArg(4)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_table_copy); return Builder.CreateCall(Callee, {TableX, TableY, SrcIdx, DstIdx, NElems}); } default: return nullptr; } } static std::pair getIntrinsicForHexagonNonClangBuiltin(unsigned BuiltinID) { struct Info { unsigned BuiltinID; Intrinsic::ID IntrinsicID; unsigned VecLen; }; static Info Infos[] = { #define CUSTOM_BUILTIN_MAPPING(x,s) \ { Hexagon::BI__builtin_HEXAGON_##x, Intrinsic::hexagon_##x, s }, CUSTOM_BUILTIN_MAPPING(L2_loadrub_pci, 0) CUSTOM_BUILTIN_MAPPING(L2_loadrb_pci, 0) CUSTOM_BUILTIN_MAPPING(L2_loadruh_pci, 0) CUSTOM_BUILTIN_MAPPING(L2_loadrh_pci, 0) CUSTOM_BUILTIN_MAPPING(L2_loadri_pci, 0) CUSTOM_BUILTIN_MAPPING(L2_loadrd_pci, 0) CUSTOM_BUILTIN_MAPPING(L2_loadrub_pcr, 0) CUSTOM_BUILTIN_MAPPING(L2_loadrb_pcr, 0) CUSTOM_BUILTIN_MAPPING(L2_loadruh_pcr, 0) CUSTOM_BUILTIN_MAPPING(L2_loadrh_pcr, 0) CUSTOM_BUILTIN_MAPPING(L2_loadri_pcr, 0) CUSTOM_BUILTIN_MAPPING(L2_loadrd_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storerb_pci, 0) CUSTOM_BUILTIN_MAPPING(S2_storerh_pci, 0) CUSTOM_BUILTIN_MAPPING(S2_storerf_pci, 0) CUSTOM_BUILTIN_MAPPING(S2_storeri_pci, 0) CUSTOM_BUILTIN_MAPPING(S2_storerd_pci, 0) CUSTOM_BUILTIN_MAPPING(S2_storerb_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storerh_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0) // Legacy builtins that take a vector in place of a vector predicate. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq_128B, 128) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq_128B, 128) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq_128B, 128) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentnq_128B, 128) #include "clang/Basic/BuiltinsHexagonMapCustomDep.def" #undef CUSTOM_BUILTIN_MAPPING }; auto CmpInfo = [] (Info A, Info B) { return A.BuiltinID < B.BuiltinID; }; static const bool SortOnce = (llvm::sort(Infos, CmpInfo), true); (void)SortOnce; const Info *F = llvm::lower_bound(Infos, Info{BuiltinID, 0, 0}, CmpInfo); if (F == std::end(Infos) || F->BuiltinID != BuiltinID) return {Intrinsic::not_intrinsic, 0}; return {F->IntrinsicID, F->VecLen}; } Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E) { Intrinsic::ID ID; unsigned VecLen; std::tie(ID, VecLen) = getIntrinsicForHexagonNonClangBuiltin(BuiltinID); auto MakeCircOp = [this, E](unsigned IntID, bool IsLoad) { // The base pointer is passed by address, so it needs to be loaded. Address A = EmitPointerWithAlignment(E->getArg(0)); Address BP = Address(A.emitRawPointer(*this), Int8PtrTy, A.getAlignment()); llvm::Value *Base = Builder.CreateLoad(BP); // The treatment of both loads and stores is the same: the arguments for // the builtin are the same as the arguments for the intrinsic. // Load: // builtin(Base, Inc, Mod, Start) -> intr(Base, Inc, Mod, Start) // builtin(Base, Mod, Start) -> intr(Base, Mod, Start) // Store: // builtin(Base, Inc, Mod, Val, Start) -> intr(Base, Inc, Mod, Val, Start) // builtin(Base, Mod, Val, Start) -> intr(Base, Mod, Val, Start) SmallVector Ops = { Base }; for (unsigned i = 1, e = E->getNumArgs(); i != e; ++i) Ops.push_back(EmitScalarExpr(E->getArg(i))); llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops); // The load intrinsics generate two results (Value, NewBase), stores // generate one (NewBase). The new base address needs to be stored. llvm::Value *NewBase = IsLoad ? Builder.CreateExtractValue(Result, 1) : Result; llvm::Value *LV = EmitScalarExpr(E->getArg(0)); Address Dest = EmitPointerWithAlignment(E->getArg(0)); llvm::Value *RetVal = Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment()); if (IsLoad) RetVal = Builder.CreateExtractValue(Result, 0); return RetVal; }; // Handle the conversion of bit-reverse load intrinsics to bit code. // The intrinsic call after this function only reads from memory and the // write to memory is dealt by the store instruction. auto MakeBrevLd = [this, E](unsigned IntID, llvm::Type *DestTy) { // The intrinsic generates one result, which is the new value for the base // pointer. It needs to be returned. The result of the load instruction is // passed to intrinsic by address, so the value needs to be stored. llvm::Value *BaseAddress = EmitScalarExpr(E->getArg(0)); // Expressions like &(*pt++) will be incremented per evaluation. // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression // per call. Address DestAddr = EmitPointerWithAlignment(E->getArg(1)); DestAddr = DestAddr.withElementType(Int8Ty); llvm::Value *DestAddress = DestAddr.emitRawPointer(*this); // Operands are Base, Dest, Modifier. // The intrinsic format in LLVM IR is defined as // { ValueType, i8* } (i8*, i32). llvm::Value *Result = Builder.CreateCall( CGM.getIntrinsic(IntID), {BaseAddress, EmitScalarExpr(E->getArg(2))}); // The value needs to be stored as the variable is passed by reference. llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0); // The store needs to be truncated to fit the destination type. // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs // to be handled with stores of respective destination type. DestVal = Builder.CreateTrunc(DestVal, DestTy); Builder.CreateAlignedStore(DestVal, DestAddress, DestAddr.getAlignment()); // The updated value of the base pointer is returned. return Builder.CreateExtractValue(Result, 1); }; auto V2Q = [this, VecLen] (llvm::Value *Vec) { Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandvrt_128B : Intrinsic::hexagon_V6_vandvrt; return Builder.CreateCall(CGM.getIntrinsic(ID), {Vec, Builder.getInt32(-1)}); }; auto Q2V = [this, VecLen] (llvm::Value *Pred) { Intrinsic::ID ID = VecLen == 128 ? Intrinsic::hexagon_V6_vandqrt_128B : Intrinsic::hexagon_V6_vandqrt; return Builder.CreateCall(CGM.getIntrinsic(ID), {Pred, Builder.getInt32(-1)}); }; switch (BuiltinID) { // These intrinsics return a tuple {Vector, VectorPred} in LLVM IR, // and the corresponding C/C++ builtins use loads/stores to update // the predicate. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry: case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry: case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: { // Get the type from the 0-th argument. llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); Address PredAddr = EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); llvm::Value *PredIn = V2Q(Builder.CreateLoad(PredAddr)); llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), PredIn}); llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this), PredAddr.getAlignment()); return Builder.CreateExtractValue(Result, 0); } // These are identical to the builtins above, except they don't consume // input carry, only generate carry-out. Since they still produce two // outputs, generate the store of the predicate, but no load. case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo: case Hexagon::BI__builtin_HEXAGON_V6_vaddcarryo_128B: case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo: case Hexagon::BI__builtin_HEXAGON_V6_vsubcarryo_128B: { // Get the type from the 0-th argument. llvm::Type *VecType = ConvertType(E->getArg(0)->getType()); Address PredAddr = EmitPointerWithAlignment(E->getArg(2)).withElementType(VecType); llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1))}); llvm::Value *PredOut = Builder.CreateExtractValue(Result, 1); Builder.CreateAlignedStore(Q2V(PredOut), PredAddr.emitRawPointer(*this), PredAddr.getAlignment()); return Builder.CreateExtractValue(Result, 0); } case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B: case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: { SmallVector Ops; const Expr *PredOp = E->getArg(0); // There will be an implicit cast to a boolean vector. Strip it. if (auto *Cast = dyn_cast(PredOp)) { if (Cast->getCastKind() == CK_BitCast) PredOp = Cast->getSubExpr(); Ops.push_back(V2Q(EmitScalarExpr(PredOp))); } for (int i = 1, e = E->getNumArgs(); i != e; ++i) Ops.push_back(EmitScalarExpr(E->getArg(i))); return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); } case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr: case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr: case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr: case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr: case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr: case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr: return MakeCircOp(ID, /*IsLoad=*/true); case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci: case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci: case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci: case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci: case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci: case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr: case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr: case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr: case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr: case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr: return MakeCircOp(ID, /*IsLoad=*/false); case Hexagon::BI__builtin_brev_ldub: return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty); case Hexagon::BI__builtin_brev_ldb: return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty); case Hexagon::BI__builtin_brev_lduh: return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty); case Hexagon::BI__builtin_brev_ldh: return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty); case Hexagon::BI__builtin_brev_ldw: return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); case Hexagon::BI__builtin_brev_ldd: return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); } // switch return nullptr; } Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { SmallVector Ops; llvm::Type *ResultType = ConvertType(E->getType()); // Find out if any arguments are required to be integer constant expressions. unsigned ICEArguments = 0; ASTContext::GetBuiltinTypeError Error; getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); if (Error == ASTContext::GE_Missing_type) { // Vector intrinsics don't have a type string. assert(BuiltinID >= clang::RISCV::FirstRVVBuiltin && BuiltinID <= clang::RISCV::LastRVVBuiltin); ICEArguments = 0; if (BuiltinID == RISCVVector::BI__builtin_rvv_vget_v || BuiltinID == RISCVVector::BI__builtin_rvv_vset_v) ICEArguments = 1 << 1; } else { assert(Error == ASTContext::GE_None && "Unexpected error"); } if (BuiltinID == RISCV::BI__builtin_riscv_ntl_load) ICEArguments |= (1 << 1); if (BuiltinID == RISCV::BI__builtin_riscv_ntl_store) ICEArguments |= (1 << 2); for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { // Handle aggregate argument, namely RVV tuple types in segment load/store if (hasAggregateEvaluationKind(E->getArg(i)->getType())) { LValue L = EmitAggExprToLValue(E->getArg(i)); llvm::Value *AggValue = Builder.CreateLoad(L.getAddress()); Ops.push_back(AggValue); continue; } Ops.push_back(EmitScalarOrConstFoldImmArg(ICEArguments, i, E)); } Intrinsic::ID ID = Intrinsic::not_intrinsic; unsigned NF = 1; // The 0th bit simulates the `vta` of RVV // The 1st bit simulates the `vma` of RVV constexpr unsigned RVV_VTA = 0x1; constexpr unsigned RVV_VMA = 0x2; int PolicyAttrs = 0; bool IsMasked = false; // Required for overloaded intrinsics. llvm::SmallVector IntrinsicTypes; switch (BuiltinID) { default: llvm_unreachable("unexpected builtin ID"); case RISCV::BI__builtin_riscv_orc_b_32: case RISCV::BI__builtin_riscv_orc_b_64: case RISCV::BI__builtin_riscv_clz_32: case RISCV::BI__builtin_riscv_clz_64: case RISCV::BI__builtin_riscv_ctz_32: case RISCV::BI__builtin_riscv_ctz_64: case RISCV::BI__builtin_riscv_clmul_32: case RISCV::BI__builtin_riscv_clmul_64: case RISCV::BI__builtin_riscv_clmulh_32: case RISCV::BI__builtin_riscv_clmulh_64: case RISCV::BI__builtin_riscv_clmulr_32: case RISCV::BI__builtin_riscv_clmulr_64: case RISCV::BI__builtin_riscv_xperm4_32: case RISCV::BI__builtin_riscv_xperm4_64: case RISCV::BI__builtin_riscv_xperm8_32: case RISCV::BI__builtin_riscv_xperm8_64: case RISCV::BI__builtin_riscv_brev8_32: case RISCV::BI__builtin_riscv_brev8_64: case RISCV::BI__builtin_riscv_zip_32: case RISCV::BI__builtin_riscv_unzip_32: { switch (BuiltinID) { default: llvm_unreachable("unexpected builtin ID"); // Zbb case RISCV::BI__builtin_riscv_orc_b_32: case RISCV::BI__builtin_riscv_orc_b_64: ID = Intrinsic::riscv_orc_b; break; case RISCV::BI__builtin_riscv_clz_32: case RISCV::BI__builtin_riscv_clz_64: { Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return Result; } case RISCV::BI__builtin_riscv_ctz_32: case RISCV::BI__builtin_riscv_ctz_64: { Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType()); Value *Result = Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, "cast"); return Result; } // Zbc case RISCV::BI__builtin_riscv_clmul_32: case RISCV::BI__builtin_riscv_clmul_64: ID = Intrinsic::riscv_clmul; break; case RISCV::BI__builtin_riscv_clmulh_32: case RISCV::BI__builtin_riscv_clmulh_64: ID = Intrinsic::riscv_clmulh; break; case RISCV::BI__builtin_riscv_clmulr_32: case RISCV::BI__builtin_riscv_clmulr_64: ID = Intrinsic::riscv_clmulr; break; // Zbkx case RISCV::BI__builtin_riscv_xperm8_32: case RISCV::BI__builtin_riscv_xperm8_64: ID = Intrinsic::riscv_xperm8; break; case RISCV::BI__builtin_riscv_xperm4_32: case RISCV::BI__builtin_riscv_xperm4_64: ID = Intrinsic::riscv_xperm4; break; // Zbkb case RISCV::BI__builtin_riscv_brev8_32: case RISCV::BI__builtin_riscv_brev8_64: ID = Intrinsic::riscv_brev8; break; case RISCV::BI__builtin_riscv_zip_32: ID = Intrinsic::riscv_zip; break; case RISCV::BI__builtin_riscv_unzip_32: ID = Intrinsic::riscv_unzip; break; } IntrinsicTypes = {ResultType}; break; } // Zk builtins // Zknh case RISCV::BI__builtin_riscv_sha256sig0: ID = Intrinsic::riscv_sha256sig0; break; case RISCV::BI__builtin_riscv_sha256sig1: ID = Intrinsic::riscv_sha256sig1; break; case RISCV::BI__builtin_riscv_sha256sum0: ID = Intrinsic::riscv_sha256sum0; break; case RISCV::BI__builtin_riscv_sha256sum1: ID = Intrinsic::riscv_sha256sum1; break; // Zksed case RISCV::BI__builtin_riscv_sm4ks: ID = Intrinsic::riscv_sm4ks; break; case RISCV::BI__builtin_riscv_sm4ed: ID = Intrinsic::riscv_sm4ed; break; // Zksh case RISCV::BI__builtin_riscv_sm3p0: ID = Intrinsic::riscv_sm3p0; break; case RISCV::BI__builtin_riscv_sm3p1: ID = Intrinsic::riscv_sm3p1; break; // Zihintntl case RISCV::BI__builtin_riscv_ntl_load: { llvm::Type *ResTy = ConvertType(E->getType()); unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL if (Ops.size() == 2) DomainVal = cast(Ops[1])->getZExtValue(); llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); llvm::MDNode *NontemporalNode = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); int Width; if(ResTy->isScalableTy()) { const ScalableVectorType *SVTy = cast(ResTy); llvm::Type *ScalarTy = ResTy->getScalarType(); Width = ScalarTy->getPrimitiveSizeInBits() * SVTy->getElementCount().getKnownMinValue(); } else Width = ResTy->getPrimitiveSizeInBits(); LoadInst *Load = Builder.CreateLoad( Address(Ops[0], ResTy, CharUnits::fromQuantity(Width / 8))); Load->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); Load->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), RISCVDomainNode); return Load; } case RISCV::BI__builtin_riscv_ntl_store: { unsigned DomainVal = 5; // Default __RISCV_NTLH_ALL if (Ops.size() == 3) DomainVal = cast(Ops[2])->getZExtValue(); llvm::MDNode *RISCVDomainNode = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(DomainVal))); llvm::MDNode *NontemporalNode = llvm::MDNode::get( getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); StoreInst *Store = Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); Store->setMetadata(llvm::LLVMContext::MD_nontemporal, NontemporalNode); Store->setMetadata(CGM.getModule().getMDKindID("riscv-nontemporal-domain"), RISCVDomainNode); return Store; } // Vector builtins are handled from here. #include "clang/Basic/riscv_vector_builtin_cg.inc" // SiFive Vector builtins are handled from here. #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc" } assert(ID != Intrinsic::not_intrinsic); llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); return Builder.CreateCall(F, Ops, ""); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp index cf5e29e5a3db..151505baf38d 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenModule.cpp @@ -1,7793 +1,7795 @@ //===--- CodeGenModule.cpp - Emit LLVM Code from ASTs for a Module --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This coordinates the per-module state used while generating code. // //===----------------------------------------------------------------------===// #include "CodeGenModule.h" #include "ABIInfo.h" #include "CGBlocks.h" #include "CGCUDARuntime.h" #include "CGCXXABI.h" #include "CGCall.h" #include "CGDebugInfo.h" #include "CGHLSLRuntime.h" #include "CGObjCRuntime.h" #include "CGOpenCLRuntime.h" #include "CGOpenMPRuntime.h" #include "CGOpenMPRuntimeGPU.h" #include "CodeGenFunction.h" #include "CodeGenPGO.h" #include "ConstantEmitter.h" #include "CoverageMappingGen.h" #include "TargetInfo.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTLambda.h" #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Mangle.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/FileManager.h" #include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/Version.h" #include "clang/CodeGen/BackendUtil.h" #include "clang/CodeGen/ConstantInitBuilder.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/AttributeMask.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/Triple.h" #include "llvm/TargetParser/X86TargetParser.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" #include using namespace clang; using namespace CodeGen; static llvm::cl::opt LimitedCoverage( "limited-coverage-experimental", llvm::cl::Hidden, llvm::cl::desc("Emit limited coverage mapping information (experimental)")); static const char AnnotationSection[] = "llvm.metadata"; static CGCXXABI *createCXXABI(CodeGenModule &CGM) { switch (CGM.getContext().getCXXABIKind()) { case TargetCXXABI::AppleARM64: case TargetCXXABI::Fuchsia: case TargetCXXABI::GenericAArch64: case TargetCXXABI::GenericARM: case TargetCXXABI::iOS: case TargetCXXABI::WatchOS: case TargetCXXABI::GenericMIPS: case TargetCXXABI::GenericItanium: case TargetCXXABI::WebAssembly: case TargetCXXABI::XL: return CreateItaniumCXXABI(CGM); case TargetCXXABI::Microsoft: return CreateMicrosoftCXXABI(CGM); } llvm_unreachable("invalid C++ ABI kind"); } static std::unique_ptr createTargetCodeGenInfo(CodeGenModule &CGM) { const TargetInfo &Target = CGM.getTarget(); const llvm::Triple &Triple = Target.getTriple(); const CodeGenOptions &CodeGenOpts = CGM.getCodeGenOpts(); switch (Triple.getArch()) { default: return createDefaultTargetCodeGenInfo(CGM); case llvm::Triple::le32: return createPNaClTargetCodeGenInfo(CGM); case llvm::Triple::m68k: return createM68kTargetCodeGenInfo(CGM); case llvm::Triple::mips: case llvm::Triple::mipsel: if (Triple.getOS() == llvm::Triple::NaCl) return createPNaClTargetCodeGenInfo(CGM); return createMIPSTargetCodeGenInfo(CGM, /*IsOS32=*/true); case llvm::Triple::mips64: case llvm::Triple::mips64el: return createMIPSTargetCodeGenInfo(CGM, /*IsOS32=*/false); case llvm::Triple::avr: { // For passing parameters, R8~R25 are used on avr, and R18~R25 are used // on avrtiny. For passing return value, R18~R25 are used on avr, and // R22~R25 are used on avrtiny. unsigned NPR = Target.getABI() == "avrtiny" ? 6 : 18; unsigned NRR = Target.getABI() == "avrtiny" ? 4 : 8; return createAVRTargetCodeGenInfo(CGM, NPR, NRR); } case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: { AArch64ABIKind Kind = AArch64ABIKind::AAPCS; if (Target.getABI() == "darwinpcs") Kind = AArch64ABIKind::DarwinPCS; else if (Triple.isOSWindows()) return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64); else if (Target.getABI() == "aapcs-soft") Kind = AArch64ABIKind::AAPCSSoft; else if (Target.getABI() == "pauthtest") Kind = AArch64ABIKind::PAuthTest; return createAArch64TargetCodeGenInfo(CGM, Kind); } case llvm::Triple::wasm32: case llvm::Triple::wasm64: { WebAssemblyABIKind Kind = WebAssemblyABIKind::MVP; if (Target.getABI() == "experimental-mv") Kind = WebAssemblyABIKind::ExperimentalMV; return createWebAssemblyTargetCodeGenInfo(CGM, Kind); } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { if (Triple.getOS() == llvm::Triple::Win32) return createWindowsARMTargetCodeGenInfo(CGM, ARMABIKind::AAPCS_VFP); ARMABIKind Kind = ARMABIKind::AAPCS; StringRef ABIStr = Target.getABI(); if (ABIStr == "apcs-gnu") Kind = ARMABIKind::APCS; else if (ABIStr == "aapcs16") Kind = ARMABIKind::AAPCS16_VFP; else if (CodeGenOpts.FloatABI == "hard" || - (CodeGenOpts.FloatABI != "soft" && - (Triple.getEnvironment() == llvm::Triple::GNUEABIHF || - Triple.getEnvironment() == llvm::Triple::MuslEABIHF || - Triple.getEnvironment() == llvm::Triple::EABIHF))) + (CodeGenOpts.FloatABI != "soft" && Triple.isHardFloatABI())) Kind = ARMABIKind::AAPCS_VFP; return createARMTargetCodeGenInfo(CGM, Kind); } case llvm::Triple::ppc: { if (Triple.isOSAIX()) return createAIXTargetCodeGenInfo(CGM, /*Is64Bit=*/false); bool IsSoftFloat = CodeGenOpts.FloatABI == "soft" || Target.hasFeature("spe"); return createPPC32TargetCodeGenInfo(CGM, IsSoftFloat); } case llvm::Triple::ppcle: { bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return createPPC32TargetCodeGenInfo(CGM, IsSoftFloat); } case llvm::Triple::ppc64: if (Triple.isOSAIX()) return createAIXTargetCodeGenInfo(CGM, /*Is64Bit=*/true); if (Triple.isOSBinFormatELF()) { PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv1; if (Target.getABI() == "elfv2") Kind = PPC64_SVR4_ABIKind::ELFv2; bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return createPPC64_SVR4_TargetCodeGenInfo(CGM, Kind, IsSoftFloat); } return createPPC64TargetCodeGenInfo(CGM); case llvm::Triple::ppc64le: { assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!"); PPC64_SVR4_ABIKind Kind = PPC64_SVR4_ABIKind::ELFv2; if (Target.getABI() == "elfv1") Kind = PPC64_SVR4_ABIKind::ELFv1; bool IsSoftFloat = CodeGenOpts.FloatABI == "soft"; return createPPC64_SVR4_TargetCodeGenInfo(CGM, Kind, IsSoftFloat); } case llvm::Triple::nvptx: case llvm::Triple::nvptx64: return createNVPTXTargetCodeGenInfo(CGM); case llvm::Triple::msp430: return createMSP430TargetCodeGenInfo(CGM); case llvm::Triple::riscv32: case llvm::Triple::riscv64: { StringRef ABIStr = Target.getABI(); unsigned XLen = Target.getPointerWidth(LangAS::Default); unsigned ABIFLen = 0; if (ABIStr.ends_with("f")) ABIFLen = 32; else if (ABIStr.ends_with("d")) ABIFLen = 64; bool EABI = ABIStr.ends_with("e"); return createRISCVTargetCodeGenInfo(CGM, XLen, ABIFLen, EABI); } case llvm::Triple::systemz: { bool SoftFloat = CodeGenOpts.FloatABI == "soft"; bool HasVector = !SoftFloat && Target.getABI() == "vector"; return createSystemZTargetCodeGenInfo(CGM, HasVector, SoftFloat); } case llvm::Triple::tce: case llvm::Triple::tcele: return createTCETargetCodeGenInfo(CGM); case llvm::Triple::x86: { bool IsDarwinVectorABI = Triple.isOSDarwin(); bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); if (Triple.getOS() == llvm::Triple::Win32) { return createWinX86_32TargetCodeGenInfo( CGM, IsDarwinVectorABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters); } return createX86_32TargetCodeGenInfo( CGM, IsDarwinVectorABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, CodeGenOpts.FloatABI == "soft"); } case llvm::Triple::x86_64: { StringRef ABI = Target.getABI(); X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: return createWinX86_64TargetCodeGenInfo(CGM, AVXLevel); default: return createX86_64TargetCodeGenInfo(CGM, AVXLevel); } } case llvm::Triple::hexagon: return createHexagonTargetCodeGenInfo(CGM); case llvm::Triple::lanai: return createLanaiTargetCodeGenInfo(CGM); case llvm::Triple::r600: return createAMDGPUTargetCodeGenInfo(CGM); case llvm::Triple::amdgcn: return createAMDGPUTargetCodeGenInfo(CGM); case llvm::Triple::sparc: return createSparcV8TargetCodeGenInfo(CGM); case llvm::Triple::sparcv9: return createSparcV9TargetCodeGenInfo(CGM); case llvm::Triple::xcore: return createXCoreTargetCodeGenInfo(CGM); case llvm::Triple::arc: return createARCTargetCodeGenInfo(CGM); case llvm::Triple::spir: case llvm::Triple::spir64: return createCommonSPIRTargetCodeGenInfo(CGM); case llvm::Triple::spirv32: case llvm::Triple::spirv64: return createSPIRVTargetCodeGenInfo(CGM); case llvm::Triple::ve: return createVETargetCodeGenInfo(CGM); case llvm::Triple::csky: { bool IsSoftFloat = !Target.hasFeature("hard-float-abi"); bool hasFP64 = Target.hasFeature("fpuv2_df") || Target.hasFeature("fpuv3_df"); return createCSKYTargetCodeGenInfo(CGM, IsSoftFloat ? 0 : hasFP64 ? 64 : 32); } case llvm::Triple::bpfeb: case llvm::Triple::bpfel: return createBPFTargetCodeGenInfo(CGM); case llvm::Triple::loongarch32: case llvm::Triple::loongarch64: { StringRef ABIStr = Target.getABI(); unsigned ABIFRLen = 0; if (ABIStr.ends_with("f")) ABIFRLen = 32; else if (ABIStr.ends_with("d")) ABIFRLen = 64; return createLoongArchTargetCodeGenInfo( CGM, Target.getPointerWidth(LangAS::Default), ABIFRLen); } } } const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { if (!TheTargetCodeGenInfo) TheTargetCodeGenInfo = createTargetCodeGenInfo(*this); return *TheTargetCodeGenInfo; } CodeGenModule::CodeGenModule(ASTContext &C, IntrusiveRefCntPtr FS, const HeaderSearchOptions &HSO, const PreprocessorOptions &PPO, const CodeGenOptions &CGO, llvm::Module &M, DiagnosticsEngine &diags, CoverageSourceInfo *CoverageInfo) : Context(C), LangOpts(C.getLangOpts()), FS(FS), HeaderSearchOpts(HSO), PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags), Target(C.getTargetInfo()), ABI(createCXXABI(*this)), VMContext(M.getContext()), VTables(*this), SanitizerMD(new SanitizerMetadata(*this)) { // Initialize the type cache. Types.reset(new CodeGenTypes(*this)); llvm::LLVMContext &LLVMContext = M.getContext(); VoidTy = llvm::Type::getVoidTy(LLVMContext); Int8Ty = llvm::Type::getInt8Ty(LLVMContext); Int16Ty = llvm::Type::getInt16Ty(LLVMContext); Int32Ty = llvm::Type::getInt32Ty(LLVMContext); Int64Ty = llvm::Type::getInt64Ty(LLVMContext); HalfTy = llvm::Type::getHalfTy(LLVMContext); BFloatTy = llvm::Type::getBFloatTy(LLVMContext); FloatTy = llvm::Type::getFloatTy(LLVMContext); DoubleTy = llvm::Type::getDoubleTy(LLVMContext); PointerWidthInBits = C.getTargetInfo().getPointerWidth(LangAS::Default); PointerAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(LangAS::Default)) .getQuantity(); SizeSizeInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getMaxPointerWidth()).getQuantity(); IntAlignInBytes = C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity(); CharTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getCharWidth()); IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth()); IntPtrTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getMaxPointerWidth()); Int8PtrTy = llvm::PointerType::get(LLVMContext, C.getTargetAddressSpace(LangAS::Default)); const llvm::DataLayout &DL = M.getDataLayout(); AllocaInt8PtrTy = llvm::PointerType::get(LLVMContext, DL.getAllocaAddrSpace()); GlobalsInt8PtrTy = llvm::PointerType::get(LLVMContext, DL.getDefaultGlobalsAddressSpace()); ConstGlobalsPtrTy = llvm::PointerType::get( LLVMContext, C.getTargetAddressSpace(GetGlobalConstantAddressSpace())); ASTAllocaAddressSpace = getTargetCodeGenInfo().getASTAllocaAddressSpace(); // Build C++20 Module initializers. // TODO: Add Microsoft here once we know the mangling required for the // initializers. CXX20ModuleInits = LangOpts.CPlusPlusModules && getCXXABI().getMangleContext().getKind() == ItaniumMangleContext::MK_Itanium; RuntimeCC = getTargetCodeGenInfo().getABIInfo().getRuntimeCC(); if (LangOpts.ObjC) createObjCRuntime(); if (LangOpts.OpenCL) createOpenCLRuntime(); if (LangOpts.OpenMP) createOpenMPRuntime(); if (LangOpts.CUDA) createCUDARuntime(); if (LangOpts.HLSL) createHLSLRuntime(); // Enable TBAA unless it's suppressed. ThreadSanitizer needs TBAA even at O0. if (LangOpts.Sanitize.has(SanitizerKind::Thread) || (!CodeGenOpts.RelaxedAliasing && CodeGenOpts.OptimizationLevel > 0)) TBAA.reset(new CodeGenTBAA(Context, getTypes(), TheModule, CodeGenOpts, getLangOpts())); // If debug info or coverage generation is enabled, create the CGDebugInfo // object. if (CodeGenOpts.getDebugInfo() != llvm::codegenoptions::NoDebugInfo || CodeGenOpts.CoverageNotesFile.size() || CodeGenOpts.CoverageDataFile.size()) DebugInfo.reset(new CGDebugInfo(*this)); Block.GlobalUniqueCount = 0; if (C.getLangOpts().ObjC) ObjCData.reset(new ObjCEntrypoints()); if (CodeGenOpts.hasProfileClangUse()) { auto ReaderOrErr = llvm::IndexedInstrProfReader::create( CodeGenOpts.ProfileInstrumentUsePath, *FS, CodeGenOpts.ProfileRemappingFile); // We're checking for profile read errors in CompilerInvocation, so if // there was an error it should've already been caught. If it hasn't been // somehow, trip an assertion. assert(ReaderOrErr); PGOReader = std::move(ReaderOrErr.get()); } // If coverage mapping generation is enabled, create the // CoverageMappingModuleGen object. if (CodeGenOpts.CoverageMapping) CoverageMapping.reset(new CoverageMappingModuleGen(*this, *CoverageInfo)); // Generate the module name hash here if needed. if (CodeGenOpts.UniqueInternalLinkageNames && !getModule().getSourceFileName().empty()) { std::string Path = getModule().getSourceFileName(); // Check if a path substitution is needed from the MacroPrefixMap. for (const auto &Entry : LangOpts.MacroPrefixMap) if (Path.rfind(Entry.first, 0) != std::string::npos) { Path = Entry.second + Path.substr(Entry.first.size()); break; } ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path); } // Record mregparm value now so it is visible through all of codegen. if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86) getModule().addModuleFlag(llvm::Module::Error, "NumRegisterParameters", CodeGenOpts.NumRegisterParameters); } CodeGenModule::~CodeGenModule() {} void CodeGenModule::createObjCRuntime() { // This is just isGNUFamily(), but we want to force implementors of // new ABIs to decide how best to do this. switch (LangOpts.ObjCRuntime.getKind()) { case ObjCRuntime::GNUstep: case ObjCRuntime::GCC: case ObjCRuntime::ObjFW: ObjCRuntime.reset(CreateGNUObjCRuntime(*this)); return; case ObjCRuntime::FragileMacOSX: case ObjCRuntime::MacOSX: case ObjCRuntime::iOS: case ObjCRuntime::WatchOS: ObjCRuntime.reset(CreateMacObjCRuntime(*this)); return; } llvm_unreachable("bad runtime kind"); } void CodeGenModule::createOpenCLRuntime() { OpenCLRuntime.reset(new CGOpenCLRuntime(*this)); } void CodeGenModule::createOpenMPRuntime() { // Select a specialized code generation class based on the target, if any. // If it does not exist use the default implementation. switch (getTriple().getArch()) { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: case llvm::Triple::amdgcn: assert(getLangOpts().OpenMPIsTargetDevice && "OpenMP AMDGPU/NVPTX is only prepared to deal with device code."); OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this)); break; default: if (LangOpts.OpenMPSimd) OpenMPRuntime.reset(new CGOpenMPSIMDRuntime(*this)); else OpenMPRuntime.reset(new CGOpenMPRuntime(*this)); break; } } void CodeGenModule::createCUDARuntime() { CUDARuntime.reset(CreateNVCUDARuntime(*this)); } void CodeGenModule::createHLSLRuntime() { HLSLRuntime.reset(new CGHLSLRuntime(*this)); } void CodeGenModule::addReplacement(StringRef Name, llvm::Constant *C) { Replacements[Name] = C; } void CodeGenModule::applyReplacements() { for (auto &I : Replacements) { StringRef MangledName = I.first; llvm::Constant *Replacement = I.second; llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (!Entry) continue; auto *OldF = cast(Entry); auto *NewF = dyn_cast(Replacement); if (!NewF) { if (auto *Alias = dyn_cast(Replacement)) { NewF = dyn_cast(Alias->getAliasee()); } else { auto *CE = cast(Replacement); assert(CE->getOpcode() == llvm::Instruction::BitCast || CE->getOpcode() == llvm::Instruction::GetElementPtr); NewF = dyn_cast(CE->getOperand(0)); } } // Replace old with new, but keep the old order. OldF->replaceAllUsesWith(Replacement); if (NewF) { NewF->removeFromParent(); OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(), NewF); } OldF->eraseFromParent(); } } void CodeGenModule::addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C) { GlobalValReplacements.push_back(std::make_pair(GV, C)); } void CodeGenModule::applyGlobalValReplacements() { for (auto &I : GlobalValReplacements) { llvm::GlobalValue *GV = I.first; llvm::Constant *C = I.second; GV->replaceAllUsesWith(C); GV->eraseFromParent(); } } // This is only used in aliases that we created and we know they have a // linear structure. static const llvm::GlobalValue *getAliasedGlobal(const llvm::GlobalValue *GV) { const llvm::Constant *C; if (auto *GA = dyn_cast(GV)) C = GA->getAliasee(); else if (auto *GI = dyn_cast(GV)) C = GI->getResolver(); else return GV; const auto *AliaseeGV = dyn_cast(C->stripPointerCasts()); if (!AliaseeGV) return nullptr; const llvm::GlobalValue *FinalGV = AliaseeGV->getAliaseeObject(); if (FinalGV == GV) return nullptr; return FinalGV; } static bool checkAliasedGlobal( const ASTContext &Context, DiagnosticsEngine &Diags, SourceLocation Location, bool IsIFunc, const llvm::GlobalValue *Alias, const llvm::GlobalValue *&GV, const llvm::MapVector &MangledDeclNames, SourceRange AliasRange) { GV = getAliasedGlobal(Alias); if (!GV) { Diags.Report(Location, diag::err_cyclic_alias) << IsIFunc; return false; } if (GV->hasCommonLinkage()) { const llvm::Triple &Triple = Context.getTargetInfo().getTriple(); if (Triple.getObjectFormat() == llvm::Triple::XCOFF) { Diags.Report(Location, diag::err_alias_to_common); return false; } } if (GV->isDeclaration()) { Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; Diags.Report(Location, diag::note_alias_requires_mangled_name) << IsIFunc << IsIFunc; // Provide a note if the given function is not found and exists as a // mangled name. for (const auto &[Decl, Name] : MangledDeclNames) { if (const auto *ND = dyn_cast(Decl.getDecl())) { if (ND->getName() == GV->getName()) { Diags.Report(Location, diag::note_alias_mangled_name_alternative) << Name << FixItHint::CreateReplacement( AliasRange, (Twine(IsIFunc ? "ifunc" : "alias") + "(\"" + Name + "\")") .str()); } } } return false; } if (IsIFunc) { // Check resolver function type. const auto *F = dyn_cast(GV); if (!F) { Diags.Report(Location, diag::err_alias_to_undefined) << IsIFunc << IsIFunc; return false; } llvm::FunctionType *FTy = F->getFunctionType(); if (!FTy->getReturnType()->isPointerTy()) { Diags.Report(Location, diag::err_ifunc_resolver_return); return false; } } return true; } // Emit a warning if toc-data attribute is requested for global variables that // have aliases and remove the toc-data attribute. static void checkAliasForTocData(llvm::GlobalVariable *GVar, const CodeGenOptions &CodeGenOpts, DiagnosticsEngine &Diags, SourceLocation Location) { if (GVar->hasAttribute("toc-data")) { auto GVId = GVar->getName(); // Is this a global variable specified by the user as local? if ((llvm::binary_search(CodeGenOpts.TocDataVarsUserSpecified, GVId))) { Diags.Report(Location, diag::warn_toc_unsupported_type) << GVId << "the variable has an alias"; } llvm::AttributeSet CurrAttributes = GVar->getAttributes(); llvm::AttributeSet NewAttributes = CurrAttributes.removeAttribute(GVar->getContext(), "toc-data"); GVar->setAttributes(NewAttributes); } } void CodeGenModule::checkAliases() { // Check if the constructed aliases are well formed. It is really unfortunate // that we have to do this in CodeGen, but we only construct mangled names // and aliases during codegen. bool Error = false; DiagnosticsEngine &Diags = getDiags(); for (const GlobalDecl &GD : Aliases) { const auto *D = cast(GD.getDecl()); SourceLocation Location; SourceRange Range; bool IsIFunc = D->hasAttr(); if (const Attr *A = D->getDefiningAttr()) { Location = A->getLocation(); Range = A->getRange(); } else llvm_unreachable("Not an alias or ifunc?"); StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Alias = GetGlobalValue(MangledName); const llvm::GlobalValue *GV = nullptr; if (!checkAliasedGlobal(getContext(), Diags, Location, IsIFunc, Alias, GV, MangledDeclNames, Range)) { Error = true; continue; } if (getContext().getTargetInfo().getTriple().isOSAIX()) if (const llvm::GlobalVariable *GVar = dyn_cast(GV)) checkAliasForTocData(const_cast(GVar), getCodeGenOpts(), Diags, Location); llvm::Constant *Aliasee = IsIFunc ? cast(Alias)->getResolver() : cast(Alias)->getAliasee(); llvm::GlobalValue *AliaseeGV; if (auto CE = dyn_cast(Aliasee)) AliaseeGV = cast(CE->getOperand(0)); else AliaseeGV = cast(Aliasee); if (const SectionAttr *SA = D->getAttr()) { StringRef AliasSection = SA->getName(); if (AliasSection != AliaseeGV->getSection()) Diags.Report(SA->getLocation(), diag::warn_alias_with_section) << AliasSection << IsIFunc << IsIFunc; } // We have to handle alias to weak aliases in here. LLVM itself disallows // this since the object semantics would not match the IL one. For // compatibility with gcc we implement it by just pointing the alias // to its aliasee's aliasee. We also warn, since the user is probably // expecting the link to be weak. if (auto *GA = dyn_cast(AliaseeGV)) { if (GA->isInterposable()) { Diags.Report(Location, diag::warn_alias_to_weak_alias) << GV->getName() << GA->getName() << IsIFunc; Aliasee = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( GA->getAliasee(), Alias->getType()); if (IsIFunc) cast(Alias)->setResolver(Aliasee); else cast(Alias)->setAliasee(Aliasee); } } // ifunc resolvers are usually implemented to run before sanitizer // initialization. Disable instrumentation to prevent the ordering issue. if (IsIFunc) cast(Aliasee)->addFnAttr( llvm::Attribute::DisableSanitizerInstrumentation); } if (!Error) return; for (const GlobalDecl &GD : Aliases) { StringRef MangledName = getMangledName(GD); llvm::GlobalValue *Alias = GetGlobalValue(MangledName); Alias->replaceAllUsesWith(llvm::UndefValue::get(Alias->getType())); Alias->eraseFromParent(); } } void CodeGenModule::clear() { DeferredDeclsToEmit.clear(); EmittedDeferredDecls.clear(); DeferredAnnotations.clear(); if (OpenMPRuntime) OpenMPRuntime->clear(); } void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags, StringRef MainFile) { if (!hasDiagnostics()) return; if (VisitedInMainFile > 0 && VisitedInMainFile == MissingInMainFile) { if (MainFile.empty()) MainFile = ""; Diags.Report(diag::warn_profile_data_unprofiled) << MainFile; } else { if (Mismatched > 0) Diags.Report(diag::warn_profile_data_out_of_date) << Visited << Mismatched; if (Missing > 0) Diags.Report(diag::warn_profile_data_missing) << Visited << Missing; } } static std::optional getLLVMVisibility(clang::LangOptions::VisibilityFromDLLStorageClassKinds K) { // Map to LLVM visibility. switch (K) { case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Keep: return std::nullopt; case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Default: return llvm::GlobalValue::DefaultVisibility; case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Hidden: return llvm::GlobalValue::HiddenVisibility; case clang::LangOptions::VisibilityFromDLLStorageClassKinds::Protected: return llvm::GlobalValue::ProtectedVisibility; } llvm_unreachable("unknown option value!"); } void setLLVMVisibility(llvm::GlobalValue &GV, std::optional V) { if (!V) return; // Reset DSO locality before setting the visibility. This removes // any effects that visibility options and annotations may have // had on the DSO locality. Setting the visibility will implicitly set // appropriate globals to DSO Local; however, this will be pessimistic // w.r.t. to the normal compiler IRGen. GV.setDSOLocal(false); GV.setVisibility(*V); } static void setVisibilityFromDLLStorageClass(const clang::LangOptions &LO, llvm::Module &M) { if (!LO.VisibilityFromDLLStorageClass) return; std::optional DLLExportVisibility = getLLVMVisibility(LO.getDLLExportVisibility()); std::optional NoDLLStorageClassVisibility = getLLVMVisibility(LO.getNoDLLStorageClassVisibility()); std::optional ExternDeclDLLImportVisibility = getLLVMVisibility(LO.getExternDeclDLLImportVisibility()); std::optional ExternDeclNoDLLStorageClassVisibility = getLLVMVisibility(LO.getExternDeclNoDLLStorageClassVisibility()); for (llvm::GlobalValue &GV : M.global_values()) { if (GV.hasAppendingLinkage() || GV.hasLocalLinkage()) continue; if (GV.isDeclarationForLinker()) setLLVMVisibility(GV, GV.getDLLStorageClass() == llvm::GlobalValue::DLLImportStorageClass ? ExternDeclDLLImportVisibility : ExternDeclNoDLLStorageClassVisibility); else setLLVMVisibility(GV, GV.getDLLStorageClass() == llvm::GlobalValue::DLLExportStorageClass ? DLLExportVisibility : NoDLLStorageClassVisibility); GV.setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); } } static bool isStackProtectorOn(const LangOptions &LangOpts, const llvm::Triple &Triple, clang::LangOptions::StackProtectorMode Mode) { if (Triple.isAMDGPU() || Triple.isNVPTX()) return false; return LangOpts.getStackProtector() == Mode; } void CodeGenModule::Release() { Module *Primary = getContext().getCurrentNamedModule(); if (CXX20ModuleInits && Primary && !Primary->isHeaderLikeModule()) EmitModuleInitializers(Primary); EmitDeferred(); DeferredDecls.insert(EmittedDeferredDecls.begin(), EmittedDeferredDecls.end()); EmittedDeferredDecls.clear(); EmitVTablesOpportunistically(); applyGlobalValReplacements(); applyReplacements(); emitMultiVersionFunctions(); if (Context.getLangOpts().IncrementalExtensions && GlobalTopLevelStmtBlockInFlight.first) { const TopLevelStmtDecl *TLSD = GlobalTopLevelStmtBlockInFlight.second; GlobalTopLevelStmtBlockInFlight.first->FinishFunction(TLSD->getEndLoc()); GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr}; } // Module implementations are initialized the same way as a regular TU that // imports one or more modules. if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) EmitCXXModuleInitFunc(Primary); else EmitCXXGlobalInitFunc(); EmitCXXGlobalCleanUpFunc(); registerGlobalDtorsWithAtExit(); EmitCXXThreadLocalInitFunc(); if (ObjCRuntime) if (llvm::Function *ObjCInitFunction = ObjCRuntime->ModuleInitFunction()) AddGlobalCtor(ObjCInitFunction); if (Context.getLangOpts().CUDA && CUDARuntime) { if (llvm::Function *CudaCtorFunction = CUDARuntime->finalizeModule()) AddGlobalCtor(CudaCtorFunction); } if (OpenMPRuntime) { OpenMPRuntime->createOffloadEntriesAndInfoMetadata(); OpenMPRuntime->clear(); } if (PGOReader) { getModule().setProfileSummary( PGOReader->getSummary(/* UseCS */ false).getMD(VMContext), llvm::ProfileSummary::PSK_Instr); if (PGOStats.hasDiagnostics()) PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName); } llvm::stable_sort(GlobalCtors, [](const Structor &L, const Structor &R) { return L.LexOrder < R.LexOrder; }); EmitCtorList(GlobalCtors, "llvm.global_ctors"); EmitCtorList(GlobalDtors, "llvm.global_dtors"); EmitGlobalAnnotations(); EmitStaticExternCAliases(); checkAliases(); EmitDeferredUnusedCoverageMappings(); CodeGenPGO(*this).setValueProfilingFlag(getModule()); CodeGenPGO(*this).setProfileVersion(getModule()); if (CoverageMapping) CoverageMapping->emit(); if (CodeGenOpts.SanitizeCfiCrossDso) { CodeGenFunction(*this).EmitCfiCheckFail(); CodeGenFunction(*this).EmitCfiCheckStub(); } if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) finalizeKCFITypes(); emitAtAvailableLinkGuard(); if (Context.getTargetInfo().getTriple().isWasm()) EmitMainVoidAlias(); if (getTriple().isAMDGPU() || (getTriple().isSPIRV() && getTriple().getVendor() == llvm::Triple::AMD)) { // Emit amdhsa_code_object_version module flag, which is code object version // times 100. if (getTarget().getTargetOpts().CodeObjectVersion != llvm::CodeObjectVersionKind::COV_None) { getModule().addModuleFlag(llvm::Module::Error, "amdhsa_code_object_version", getTarget().getTargetOpts().CodeObjectVersion); } // Currently, "-mprintf-kind" option is only supported for HIP if (LangOpts.HIP) { auto *MDStr = llvm::MDString::get( getLLVMContext(), (getTarget().getTargetOpts().AMDGPUPrintfKindVal == TargetOptions::AMDGPUPrintfKind::Hostcall) ? "hostcall" : "buffered"); getModule().addModuleFlag(llvm::Module::Error, "amdgpu_printf_kind", MDStr); } } // Emit a global array containing all external kernels or device variables // used by host functions and mark it as used for CUDA/HIP. This is necessary // to get kernels or device variables in archives linked in even if these // kernels or device variables are only used in host functions. if (!Context.CUDAExternalDeviceDeclODRUsedByHost.empty()) { SmallVector UsedArray; for (auto D : Context.CUDAExternalDeviceDeclODRUsedByHost) { GlobalDecl GD; if (auto *FD = dyn_cast(D)) GD = GlobalDecl(FD, KernelReferenceKind::Kernel); else GD = GlobalDecl(D); UsedArray.push_back(llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( GetAddrOfGlobal(GD), Int8PtrTy)); } llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size()); auto *GV = new llvm::GlobalVariable( getModule(), ATy, false, llvm::GlobalValue::InternalLinkage, llvm::ConstantArray::get(ATy, UsedArray), "__clang_gpu_used_external"); addCompilerUsedGlobal(GV); } if (LangOpts.HIP && !getLangOpts().OffloadingNewDriver) { // Emit a unique ID so that host and device binaries from the same // compilation unit can be associated. auto *GV = new llvm::GlobalVariable( getModule(), Int8Ty, false, llvm::GlobalValue::ExternalLinkage, llvm::Constant::getNullValue(Int8Ty), "__hip_cuid_" + getContext().getCUIDHash()); addCompilerUsedGlobal(GV); } emitLLVMUsed(); if (SanStats) SanStats->finish(); if (CodeGenOpts.Autolink && (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) { EmitModuleLinkOptions(); } // On ELF we pass the dependent library specifiers directly to the linker // without manipulating them. This is in contrast to other platforms where // they are mapped to a specific linker option by the compiler. This // difference is a result of the greater variety of ELF linkers and the fact // that ELF linkers tend to handle libraries in a more complicated fashion // than on other platforms. This forces us to defer handling the dependent // libs to the linker. // // CUDA/HIP device and host libraries are different. Currently there is no // way to differentiate dependent libraries for host or device. Existing // usage of #pragma comment(lib, *) is intended for host libraries on // Windows. Therefore emit llvm.dependent-libraries only for host. if (!ELFDependentLibraries.empty() && !Context.getLangOpts().CUDAIsDevice) { auto *NMD = getModule().getOrInsertNamedMetadata("llvm.dependent-libraries"); for (auto *MD : ELFDependentLibraries) NMD->addOperand(MD); } if (CodeGenOpts.DwarfVersion) { getModule().addModuleFlag(llvm::Module::Max, "Dwarf Version", CodeGenOpts.DwarfVersion); } if (CodeGenOpts.Dwarf64) getModule().addModuleFlag(llvm::Module::Max, "DWARF64", 1); if (Context.getLangOpts().SemanticInterposition) // Require various optimization to respect semantic interposition. getModule().setSemanticInterposition(true); if (CodeGenOpts.EmitCodeView) { // Indicate that we want CodeView in the metadata. getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1); } if (CodeGenOpts.CodeViewGHash) { getModule().addModuleFlag(llvm::Module::Warning, "CodeViewGHash", 1); } if (CodeGenOpts.ControlFlowGuard) { // Function ID tables and checks for Control Flow Guard (cfguard=2). getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 2); } else if (CodeGenOpts.ControlFlowGuardNoChecks) { // Function ID tables for Control Flow Guard (cfguard=1). getModule().addModuleFlag(llvm::Module::Warning, "cfguard", 1); } if (CodeGenOpts.EHContGuard) { // Function ID tables for EH Continuation Guard. getModule().addModuleFlag(llvm::Module::Warning, "ehcontguard", 1); } if (Context.getLangOpts().Kernel) { // Note if we are compiling with /kernel. getModule().addModuleFlag(llvm::Module::Warning, "ms-kernel", 1); } if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) { // We don't support LTO with 2 with different StrictVTablePointers // FIXME: we could support it by stripping all the information introduced // by StrictVTablePointers. getModule().addModuleFlag(llvm::Module::Error, "StrictVTablePointers",1); llvm::Metadata *Ops[2] = { llvm::MDString::get(VMContext, "StrictVTablePointers"), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( llvm::Type::getInt32Ty(VMContext), 1))}; getModule().addModuleFlag(llvm::Module::Require, "StrictVTablePointersRequirement", llvm::MDNode::get(VMContext, Ops)); } if (getModuleDebugInfo()) // We support a single version in the linked module. The LLVM // parser will drop debug info with a different version number // (and warn about it, too). getModule().addModuleFlag(llvm::Module::Warning, "Debug Info Version", llvm::DEBUG_METADATA_VERSION); // We need to record the widths of enums and wchar_t, so that we can generate // the correct build attributes in the ARM backend. wchar_size is also used by // TargetLibraryInfo. uint64_t WCharWidth = Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity(); getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth); if (getTriple().isOSzOS()) { getModule().addModuleFlag(llvm::Module::Warning, "zos_product_major_version", uint32_t(CLANG_VERSION_MAJOR)); getModule().addModuleFlag(llvm::Module::Warning, "zos_product_minor_version", uint32_t(CLANG_VERSION_MINOR)); getModule().addModuleFlag(llvm::Module::Warning, "zos_product_patchlevel", uint32_t(CLANG_VERSION_PATCHLEVEL)); std::string ProductId = getClangVendor() + "clang"; getModule().addModuleFlag(llvm::Module::Error, "zos_product_id", llvm::MDString::get(VMContext, ProductId)); // Record the language because we need it for the PPA2. StringRef lang_str = languageToString( LangStandard::getLangStandardForKind(LangOpts.LangStd).Language); getModule().addModuleFlag(llvm::Module::Error, "zos_cu_language", llvm::MDString::get(VMContext, lang_str)); time_t TT = PreprocessorOpts.SourceDateEpoch ? *PreprocessorOpts.SourceDateEpoch : std::time(nullptr); getModule().addModuleFlag(llvm::Module::Max, "zos_translation_time", static_cast(TT)); // Multiple modes will be supported here. getModule().addModuleFlag(llvm::Module::Error, "zos_le_char_mode", llvm::MDString::get(VMContext, "ascii")); } llvm::Triple T = Context.getTargetInfo().getTriple(); if (T.isARM() || T.isThumb()) { // The minimum width of an enum in bytes uint64_t EnumWidth = Context.getLangOpts().ShortEnums ? 1 : 4; getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth); } if (T.isRISCV()) { StringRef ABIStr = Target.getABI(); llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag(llvm::Module::Error, "target-abi", llvm::MDString::get(Ctx, ABIStr)); // Add the canonical ISA string as metadata so the backend can set the ELF // attributes correctly. We use AppendUnique so LTO will keep all of the // unique ISA strings that were linked together. const std::vector &Features = getTarget().getTargetOpts().Features; auto ParseResult = llvm::RISCVISAInfo::parseFeatures(T.isRISCV64() ? 64 : 32, Features); if (!errorToBool(ParseResult.takeError())) getModule().addModuleFlag( llvm::Module::AppendUnique, "riscv-isa", llvm::MDNode::get( Ctx, llvm::MDString::get(Ctx, (*ParseResult)->toString()))); } if (CodeGenOpts.SanitizeCfiCrossDso) { // Indicate that we want cross-DSO control flow integrity checks. getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1); } if (CodeGenOpts.WholeProgramVTables) { // Indicate whether VFE was enabled for this module, so that the // vcall_visibility metadata added under whole program vtables is handled // appropriately in the optimizer. getModule().addModuleFlag(llvm::Module::Error, "Virtual Function Elim", CodeGenOpts.VirtualFunctionElimination); } if (LangOpts.Sanitize.has(SanitizerKind::CFIICall)) { getModule().addModuleFlag(llvm::Module::Override, "CFI Canonical Jump Tables", CodeGenOpts.SanitizeCfiCanonicalJumpTables); } + if (CodeGenOpts.SanitizeCfiICallNormalizeIntegers) { + getModule().addModuleFlag(llvm::Module::Override, "cfi-normalize-integers", + 1); + } + if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) { getModule().addModuleFlag(llvm::Module::Override, "kcfi", 1); // KCFI assumes patchable-function-prefix is the same for all indirectly // called functions. Store the expected offset for code generation. if (CodeGenOpts.PatchableFunctionEntryOffset) getModule().addModuleFlag(llvm::Module::Override, "kcfi-offset", CodeGenOpts.PatchableFunctionEntryOffset); } if (CodeGenOpts.CFProtectionReturn && Target.checkCFProtectionReturnSupported(getDiags())) { // Indicate that we want to instrument return control flow protection. getModule().addModuleFlag(llvm::Module::Min, "cf-protection-return", 1); } if (CodeGenOpts.CFProtectionBranch && Target.checkCFProtectionBranchSupported(getDiags())) { // Indicate that we want to instrument branch control flow protection. getModule().addModuleFlag(llvm::Module::Min, "cf-protection-branch", 1); } if (CodeGenOpts.FunctionReturnThunks) getModule().addModuleFlag(llvm::Module::Override, "function_return_thunk_extern", 1); if (CodeGenOpts.IndirectBranchCSPrefix) getModule().addModuleFlag(llvm::Module::Override, "indirect_branch_cs_prefix", 1); // Add module metadata for return address signing (ignoring // non-leaf/all) and stack tagging. These are actually turned on by function // attributes, but we use module metadata to emit build attributes. This is // needed for LTO, where the function attributes are inside bitcode // serialised into a global variable by the time build attributes are // emitted, so we can't access them. LTO objects could be compiled with // different flags therefore module flags are set to "Min" behavior to achieve // the same end result of the normal build where e.g BTI is off if any object // doesn't support it. if (Context.getTargetInfo().hasFeature("ptrauth") && LangOpts.getSignReturnAddressScope() != LangOptions::SignReturnAddressScopeKind::None) getModule().addModuleFlag(llvm::Module::Override, "sign-return-address-buildattr", 1); if (LangOpts.Sanitize.has(SanitizerKind::MemtagStack)) getModule().addModuleFlag(llvm::Module::Override, "tag-stack-memory-buildattr", 1); if (T.isARM() || T.isThumb() || T.isAArch64()) { if (LangOpts.BranchTargetEnforcement) getModule().addModuleFlag(llvm::Module::Min, "branch-target-enforcement", 1); if (LangOpts.BranchProtectionPAuthLR) getModule().addModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 1); if (LangOpts.GuardedControlStack) getModule().addModuleFlag(llvm::Module::Min, "guarded-control-stack", 1); if (LangOpts.hasSignReturnAddress()) getModule().addModuleFlag(llvm::Module::Min, "sign-return-address", 1); if (LangOpts.isSignReturnAddressScopeAll()) getModule().addModuleFlag(llvm::Module::Min, "sign-return-address-all", 1); if (!LangOpts.isSignReturnAddressWithAKey()) getModule().addModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 1); if (getTriple().isOSLinux()) { assert(getTriple().isOSBinFormatELF()); using namespace llvm::ELF; uint64_t PAuthABIVersion = (LangOpts.PointerAuthIntrinsics << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS) | (LangOpts.PointerAuthCalls << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS) | (LangOpts.PointerAuthReturns << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS) | (LangOpts.PointerAuthAuthTraps << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS) | (LangOpts.PointerAuthVTPtrAddressDiscrimination << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR) | (LangOpts.PointerAuthVTPtrTypeDiscrimination << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR) | (LangOpts.PointerAuthInitFini << AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI); static_assert(AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI == AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST, "Update when new enum items are defined"); if (PAuthABIVersion != 0) { getModule().addModuleFlag(llvm::Module::Error, "aarch64-elf-pauthabi-platform", AARCH64_PAUTH_PLATFORM_LLVM_LINUX); getModule().addModuleFlag(llvm::Module::Error, "aarch64-elf-pauthabi-version", PAuthABIVersion); } } } if (CodeGenOpts.StackClashProtector) getModule().addModuleFlag( llvm::Module::Override, "probe-stack", llvm::MDString::get(TheModule.getContext(), "inline-asm")); if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) getModule().addModuleFlag(llvm::Module::Min, "stack-probe-size", CodeGenOpts.StackProbeSize); if (!CodeGenOpts.MemoryProfileOutput.empty()) { llvm::LLVMContext &Ctx = TheModule.getContext(); getModule().addModuleFlag( llvm::Module::Error, "MemProfProfileFilename", llvm::MDString::get(Ctx, CodeGenOpts.MemoryProfileOutput)); } if (LangOpts.CUDAIsDevice && getTriple().isNVPTX()) { // Indicate whether __nvvm_reflect should be configured to flush denormal // floating point values to 0. (This corresponds to its "__CUDA_FTZ" // property.) getModule().addModuleFlag(llvm::Module::Override, "nvvm-reflect-ftz", CodeGenOpts.FP32DenormalMode.Output != llvm::DenormalMode::IEEE); } if (LangOpts.EHAsynch) getModule().addModuleFlag(llvm::Module::Warning, "eh-asynch", 1); // Indicate whether this Module was compiled with -fopenmp if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) getModule().addModuleFlag(llvm::Module::Max, "openmp", LangOpts.OpenMP); if (getLangOpts().OpenMPIsTargetDevice) getModule().addModuleFlag(llvm::Module::Max, "openmp-device", LangOpts.OpenMP); // Emit OpenCL specific module metadata: OpenCL/SPIR version. if (LangOpts.OpenCL || (LangOpts.CUDAIsDevice && getTriple().isSPIRV())) { EmitOpenCLMetadata(); // Emit SPIR version. if (getTriple().isSPIR()) { // SPIR v2.0 s2.12 - The SPIR version used by the module is stored in the // opencl.spir.version named metadata. // C++ for OpenCL has a distinct mapping for version compatibility with // OpenCL. auto Version = LangOpts.getOpenCLCompatibleVersion(); llvm::Metadata *SPIRVerElts[] = { llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( Int32Ty, (Version / 100 > 1) ? 0 : 2))}; llvm::NamedMDNode *SPIRVerMD = TheModule.getOrInsertNamedMetadata("opencl.spir.version"); llvm::LLVMContext &Ctx = TheModule.getContext(); SPIRVerMD->addOperand(llvm::MDNode::get(Ctx, SPIRVerElts)); } } // HLSL related end of code gen work items. if (LangOpts.HLSL) getHLSLRuntime().finishCodeGen(); if (uint32_t PLevel = Context.getLangOpts().PICLevel) { assert(PLevel < 3 && "Invalid PIC Level"); getModule().setPICLevel(static_cast(PLevel)); if (Context.getLangOpts().PIE) getModule().setPIELevel(static_cast(PLevel)); } if (getCodeGenOpts().CodeModel.size() > 0) { unsigned CM = llvm::StringSwitch(getCodeGenOpts().CodeModel) .Case("tiny", llvm::CodeModel::Tiny) .Case("small", llvm::CodeModel::Small) .Case("kernel", llvm::CodeModel::Kernel) .Case("medium", llvm::CodeModel::Medium) .Case("large", llvm::CodeModel::Large) .Default(~0u); if (CM != ~0u) { llvm::CodeModel::Model codeModel = static_cast(CM); getModule().setCodeModel(codeModel); if ((CM == llvm::CodeModel::Medium || CM == llvm::CodeModel::Large) && Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) { getModule().setLargeDataThreshold(getCodeGenOpts().LargeDataThreshold); } } } if (CodeGenOpts.NoPLT) getModule().setRtLibUseGOT(); if (getTriple().isOSBinFormatELF() && CodeGenOpts.DirectAccessExternalData != getModule().getDirectAccessExternalData()) { getModule().setDirectAccessExternalData( CodeGenOpts.DirectAccessExternalData); } if (CodeGenOpts.UnwindTables) getModule().setUwtable(llvm::UWTableKind(CodeGenOpts.UnwindTables)); switch (CodeGenOpts.getFramePointer()) { case CodeGenOptions::FramePointerKind::None: // 0 ("none") is the default. break; case CodeGenOptions::FramePointerKind::Reserved: getModule().setFramePointer(llvm::FramePointerKind::Reserved); break; case CodeGenOptions::FramePointerKind::NonLeaf: getModule().setFramePointer(llvm::FramePointerKind::NonLeaf); break; case CodeGenOptions::FramePointerKind::All: getModule().setFramePointer(llvm::FramePointerKind::All); break; } SimplifyPersonality(); if (getCodeGenOpts().EmitDeclMetadata) EmitDeclMetadata(); if (getCodeGenOpts().CoverageNotesFile.size() || getCodeGenOpts().CoverageDataFile.size()) EmitCoverageFile(); if (CGDebugInfo *DI = getModuleDebugInfo()) DI->finalize(); if (getCodeGenOpts().EmitVersionIdentMetadata) EmitVersionIdentMetadata(); if (!getCodeGenOpts().RecordCommandLine.empty()) EmitCommandLineMetadata(); if (!getCodeGenOpts().StackProtectorGuard.empty()) getModule().setStackProtectorGuard(getCodeGenOpts().StackProtectorGuard); if (!getCodeGenOpts().StackProtectorGuardReg.empty()) getModule().setStackProtectorGuardReg( getCodeGenOpts().StackProtectorGuardReg); if (!getCodeGenOpts().StackProtectorGuardSymbol.empty()) getModule().setStackProtectorGuardSymbol( getCodeGenOpts().StackProtectorGuardSymbol); if (getCodeGenOpts().StackProtectorGuardOffset != INT_MAX) getModule().setStackProtectorGuardOffset( getCodeGenOpts().StackProtectorGuardOffset); if (getCodeGenOpts().StackAlignment) getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment); if (getCodeGenOpts().SkipRaxSetup) getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); if (getLangOpts().RegCall4) getModule().addModuleFlag(llvm::Module::Override, "RegCallv4", 1); if (getContext().getTargetInfo().getMaxTLSAlign()) getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign", getContext().getTargetInfo().getMaxTLSAlign()); getTargetCodeGenInfo().emitTargetGlobals(*this); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); EmitBackendOptionsMetadata(getCodeGenOpts()); // If there is device offloading code embed it in the host now. EmbedObject(&getModule(), CodeGenOpts, getDiags()); // Set visibility from DLL storage class // We do this at the end of LLVM IR generation; after any operation // that might affect the DLL storage class or the visibility, and // before anything that might act on these. setVisibilityFromDLLStorageClass(LangOpts, getModule()); // Check the tail call symbols are truly undefined. if (getTriple().isPPC() && !MustTailCallUndefinedGlobals.empty()) { for (auto &I : MustTailCallUndefinedGlobals) { if (!I.first->isDefined()) getDiags().Report(I.second, diag::err_ppc_impossible_musttail) << 2; else { StringRef MangledName = getMangledName(GlobalDecl(I.first)); llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (!Entry || Entry->isWeakForLinker() || Entry->isDeclarationForLinker()) getDiags().Report(I.second, diag::err_ppc_impossible_musttail) << 2; } } } } void CodeGenModule::EmitOpenCLMetadata() { // SPIR v2.0 s2.13 - The OpenCL version used by the module is stored in the // opencl.ocl.version named metadata node. // C++ for OpenCL has a distinct mapping for versions compatible with OpenCL. auto CLVersion = LangOpts.getOpenCLCompatibleVersion(); auto EmitVersion = [this](StringRef MDName, int Version) { llvm::Metadata *OCLVerElts[] = { llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(Int32Ty, Version / 100)), llvm::ConstantAsMetadata::get( llvm::ConstantInt::get(Int32Ty, (Version % 100) / 10))}; llvm::NamedMDNode *OCLVerMD = TheModule.getOrInsertNamedMetadata(MDName); llvm::LLVMContext &Ctx = TheModule.getContext(); OCLVerMD->addOperand(llvm::MDNode::get(Ctx, OCLVerElts)); }; EmitVersion("opencl.ocl.version", CLVersion); if (LangOpts.OpenCLCPlusPlus) { // In addition to the OpenCL compatible version, emit the C++ version. EmitVersion("opencl.cxx.version", LangOpts.OpenCLCPlusPlusVersion); } } void CodeGenModule::EmitBackendOptionsMetadata( const CodeGenOptions &CodeGenOpts) { if (getTriple().isRISCV()) { getModule().addModuleFlag(llvm::Module::Min, "SmallDataLimit", CodeGenOpts.SmallDataLimit); } } void CodeGenModule::UpdateCompletedType(const TagDecl *TD) { // Make sure that this type is translated. getTypes().UpdateCompletedType(TD); } void CodeGenModule::RefreshTypeCacheForClass(const CXXRecordDecl *RD) { // Make sure that this type is translated. getTypes().RefreshTypeCacheForClass(RD); } llvm::MDNode *CodeGenModule::getTBAATypeInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getTypeInfo(QTy); } TBAAAccessInfo CodeGenModule::getTBAAAccessInfo(QualType AccessType) { if (!TBAA) return TBAAAccessInfo(); if (getLangOpts().CUDAIsDevice) { // As CUDA builtin surface/texture types are replaced, skip generating TBAA // access info. if (AccessType->isCUDADeviceBuiltinSurfaceType()) { if (getTargetCodeGenInfo().getCUDADeviceBuiltinSurfaceDeviceType() != nullptr) return TBAAAccessInfo(); } else if (AccessType->isCUDADeviceBuiltinTextureType()) { if (getTargetCodeGenInfo().getCUDADeviceBuiltinTextureDeviceType() != nullptr) return TBAAAccessInfo(); } } return TBAA->getAccessInfo(AccessType); } TBAAAccessInfo CodeGenModule::getTBAAVTablePtrAccessInfo(llvm::Type *VTablePtrType) { if (!TBAA) return TBAAAccessInfo(); return TBAA->getVTablePtrAccessInfo(VTablePtrType); } llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getTBAAStructInfo(QTy); } llvm::MDNode *CodeGenModule::getTBAABaseTypeInfo(QualType QTy) { if (!TBAA) return nullptr; return TBAA->getBaseTypeInfo(QTy); } llvm::MDNode *CodeGenModule::getTBAAAccessTagInfo(TBAAAccessInfo Info) { if (!TBAA) return nullptr; return TBAA->getAccessTagInfo(Info); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, TBAAAccessInfo TargetInfo) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForCast(SourceInfo, TargetInfo); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, TBAAAccessInfo InfoB) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForConditionalOperator(InfoA, InfoB); } TBAAAccessInfo CodeGenModule::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, TBAAAccessInfo SrcInfo) { if (!TBAA) return TBAAAccessInfo(); return TBAA->mergeTBAAInfoForConditionalOperator(DestInfo, SrcInfo); } void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst, TBAAAccessInfo TBAAInfo) { if (llvm::MDNode *Tag = getTBAAAccessTagInfo(TBAAInfo)) Inst->setMetadata(llvm::LLVMContext::MD_tbaa, Tag); } void CodeGenModule::DecorateInstructionWithInvariantGroup( llvm::Instruction *I, const CXXRecordDecl *RD) { I->setMetadata(llvm::LLVMContext::MD_invariant_group, llvm::MDNode::get(getLLVMContext(), {})); } void CodeGenModule::Error(SourceLocation loc, StringRef message) { unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "%0"); getDiags().Report(Context.getFullLoc(loc), diagID) << message; } /// ErrorUnsupported - Print out an error that codegen doesn't support the /// specified stmt yet. void CodeGenModule::ErrorUnsupported(const Stmt *S, const char *Type) { unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "cannot compile this %0 yet"); std::string Msg = Type; getDiags().Report(Context.getFullLoc(S->getBeginLoc()), DiagID) << Msg << S->getSourceRange(); } /// ErrorUnsupported - Print out an error that codegen doesn't support the /// specified decl yet. void CodeGenModule::ErrorUnsupported(const Decl *D, const char *Type) { unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "cannot compile this %0 yet"); std::string Msg = Type; getDiags().Report(Context.getFullLoc(D->getLocation()), DiagID) << Msg; } llvm::ConstantInt *CodeGenModule::getSize(CharUnits size) { return llvm::ConstantInt::get(SizeTy, size.getQuantity()); } void CodeGenModule::setGlobalVisibility(llvm::GlobalValue *GV, const NamedDecl *D) const { // Internal definitions always have default visibility. if (GV->hasLocalLinkage()) { GV->setVisibility(llvm::GlobalValue::DefaultVisibility); return; } if (!D) return; // Set visibility for definitions, and for declarations if requested globally // or set explicitly. LinkageInfo LV = D->getLinkageAndVisibility(); // OpenMP declare target variables must be visible to the host so they can // be registered. We require protected visibility unless the variable has // the DT_nohost modifier and does not need to be registered. if (Context.getLangOpts().OpenMP && Context.getLangOpts().OpenMPIsTargetDevice && isa(D) && D->hasAttr() && D->getAttr()->getDevType() != OMPDeclareTargetDeclAttr::DT_NoHost && LV.getVisibility() == HiddenVisibility) { GV->setVisibility(llvm::GlobalValue::ProtectedVisibility); return; } if (GV->hasDLLExportStorageClass() || GV->hasDLLImportStorageClass()) { // Reject incompatible dlllstorage and visibility annotations. if (!LV.isVisibilityExplicit()) return; if (GV->hasDLLExportStorageClass()) { if (LV.getVisibility() == HiddenVisibility) getDiags().Report(D->getLocation(), diag::err_hidden_visibility_dllexport); } else if (LV.getVisibility() != DefaultVisibility) { getDiags().Report(D->getLocation(), diag::err_non_default_visibility_dllimport); } return; } if (LV.isVisibilityExplicit() || getLangOpts().SetVisibilityForExternDecls || !GV->isDeclarationForLinker()) GV->setVisibility(GetLLVMVisibility(LV.getVisibility())); } static bool shouldAssumeDSOLocal(const CodeGenModule &CGM, llvm::GlobalValue *GV) { if (GV->hasLocalLinkage()) return true; if (!GV->hasDefaultVisibility() && !GV->hasExternalWeakLinkage()) return true; // DLLImport explicitly marks the GV as external. if (GV->hasDLLImportStorageClass()) return false; const llvm::Triple &TT = CGM.getTriple(); const auto &CGOpts = CGM.getCodeGenOpts(); if (TT.isWindowsGNUEnvironment()) { // In MinGW, variables without DLLImport can still be automatically // imported from a DLL by the linker; don't mark variables that // potentially could come from another DLL as DSO local. // With EmulatedTLS, TLS variables can be autoimported from other DLLs // (and this actually happens in the public interface of libstdc++), so // such variables can't be marked as DSO local. (Native TLS variables // can't be dllimported at all, though.) if (GV->isDeclarationForLinker() && isa(GV) && (!GV->isThreadLocal() || CGM.getCodeGenOpts().EmulatedTLS) && CGOpts.AutoImport) return false; } // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols // remain unresolved in the link, they can be resolved to zero, which is // outside the current DSO. if (TT.isOSBinFormatCOFF() && GV->hasExternalWeakLinkage()) return false; // Every other GV is local on COFF. // Make an exception for windows OS in the triple: Some firmware builds use // *-win32-macho triples. This (accidentally?) produced windows relocations // without GOT tables in older clang versions; Keep this behaviour. // FIXME: even thread local variables? if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO())) return true; // Only handle COFF and ELF for now. if (!TT.isOSBinFormatELF()) return false; // If this is not an executable, don't assume anything is local. llvm::Reloc::Model RM = CGOpts.RelocationModel; const auto &LOpts = CGM.getLangOpts(); if (RM != llvm::Reloc::Static && !LOpts.PIE) { // On ELF, if -fno-semantic-interposition is specified and the target // supports local aliases, there will be neither CC1 // -fsemantic-interposition nor -fhalf-no-semantic-interposition. Set // dso_local on the function if using a local alias is preferable (can avoid // PLT indirection). if (!(isa(GV) && GV->canBenefitFromLocalAlias())) return false; return !(CGM.getLangOpts().SemanticInterposition || CGM.getLangOpts().HalfNoSemanticInterposition); } // A definition cannot be preempted from an executable. if (!GV->isDeclarationForLinker()) return true; // Most PIC code sequences that assume that a symbol is local cannot produce a // 0 if it turns out the symbol is undefined. While this is ABI and relocation // depended, it seems worth it to handle it here. if (RM == llvm::Reloc::PIC_ && GV->hasExternalWeakLinkage()) return false; // PowerPC64 prefers TOC indirection to avoid copy relocations. if (TT.isPPC64()) return false; if (CGOpts.DirectAccessExternalData) { // If -fdirect-access-external-data (default for -fno-pic), set dso_local // for non-thread-local variables. If the symbol is not defined in the // executable, a copy relocation will be needed at link time. dso_local is // excluded for thread-local variables because they generally don't support // copy relocations. if (auto *Var = dyn_cast(GV)) if (!Var->isThreadLocal()) return true; // -fno-pic sets dso_local on a function declaration to allow direct // accesses when taking its address (similar to a data symbol). If the // function is not defined in the executable, a canonical PLT entry will be // needed at link time. -fno-direct-access-external-data can avoid the // canonical PLT entry. We don't generalize this condition to -fpie/-fpic as // it could just cause trouble without providing perceptible benefits. if (isa(GV) && !CGOpts.NoPLT && RM == llvm::Reloc::Static) return true; } // If we can use copy relocations we can assume it is local. // Otherwise don't assume it is local. return false; } void CodeGenModule::setDSOLocal(llvm::GlobalValue *GV) const { GV->setDSOLocal(shouldAssumeDSOLocal(*this, GV)); } void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, GlobalDecl GD) const { const auto *D = dyn_cast(GD.getDecl()); // C++ destructors have a few C++ ABI specific special cases. if (const auto *Dtor = dyn_cast_or_null(D)) { getCXXABI().setCXXDestructorDLLStorage(GV, Dtor, GD.getDtorType()); return; } setDLLImportDLLExport(GV, D); } void CodeGenModule::setDLLImportDLLExport(llvm::GlobalValue *GV, const NamedDecl *D) const { if (D && D->isExternallyVisible()) { if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); else if ((D->hasAttr() || shouldMapVisibilityToDLLExport(D)) && !GV->isDeclarationForLinker()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); } } void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, GlobalDecl GD) const { setDLLImportDLLExport(GV, GD); setGVPropertiesAux(GV, dyn_cast(GD.getDecl())); } void CodeGenModule::setGVProperties(llvm::GlobalValue *GV, const NamedDecl *D) const { setDLLImportDLLExport(GV, D); setGVPropertiesAux(GV, D); } void CodeGenModule::setGVPropertiesAux(llvm::GlobalValue *GV, const NamedDecl *D) const { setGlobalVisibility(GV, D); setDSOLocal(GV); GV->setPartition(CodeGenOpts.SymbolPartition); } static llvm::GlobalVariable::ThreadLocalMode GetLLVMTLSModel(StringRef S) { return llvm::StringSwitch(S) .Case("global-dynamic", llvm::GlobalVariable::GeneralDynamicTLSModel) .Case("local-dynamic", llvm::GlobalVariable::LocalDynamicTLSModel) .Case("initial-exec", llvm::GlobalVariable::InitialExecTLSModel) .Case("local-exec", llvm::GlobalVariable::LocalExecTLSModel); } llvm::GlobalVariable::ThreadLocalMode CodeGenModule::GetDefaultLLVMTLSModel() const { switch (CodeGenOpts.getDefaultTLSModel()) { case CodeGenOptions::GeneralDynamicTLSModel: return llvm::GlobalVariable::GeneralDynamicTLSModel; case CodeGenOptions::LocalDynamicTLSModel: return llvm::GlobalVariable::LocalDynamicTLSModel; case CodeGenOptions::InitialExecTLSModel: return llvm::GlobalVariable::InitialExecTLSModel; case CodeGenOptions::LocalExecTLSModel: return llvm::GlobalVariable::LocalExecTLSModel; } llvm_unreachable("Invalid TLS model!"); } void CodeGenModule::setTLSMode(llvm::GlobalValue *GV, const VarDecl &D) const { assert(D.getTLSKind() && "setting TLS mode on non-TLS var!"); llvm::GlobalValue::ThreadLocalMode TLM; TLM = GetDefaultLLVMTLSModel(); // Override the TLS model if it is explicitly specified. if (const TLSModelAttr *Attr = D.getAttr()) { TLM = GetLLVMTLSModel(Attr->getModel()); } GV->setThreadLocalMode(TLM); } static std::string getCPUSpecificMangling(const CodeGenModule &CGM, StringRef Name) { const TargetInfo &Target = CGM.getTarget(); return (Twine('.') + Twine(Target.CPUSpecificManglingCharacter(Name))).str(); } static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, unsigned CPUIndex, raw_ostream &Out) { // cpu_specific gets the current name, dispatch gets the resolver if IFunc is // supported. if (Attr) Out << getCPUSpecificMangling(CGM, Attr->getCPUName(CPUIndex)->getName()); else if (CGM.getTarget().supportsIFunc()) Out << ".resolver"; } // Returns true if GD is a function decl with internal linkage and // needs a unique suffix after the mangled name. static bool isUniqueInternalLinkageDecl(GlobalDecl GD, CodeGenModule &CGM) { const Decl *D = GD.getDecl(); return !CGM.getModuleNameHash().empty() && isa(D) && (CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage); } static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, const NamedDecl *ND, bool OmitMultiVersionMangling = false) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); MangleContext &MC = CGM.getCXXABI().getMangleContext(); if (!CGM.getModuleNameHash().empty()) MC.needsUniqueInternalLinkageNames(); bool ShouldMangle = MC.shouldMangleDeclName(ND); if (ShouldMangle) MC.mangleName(GD.getWithDecl(ND), Out); else { IdentifierInfo *II = ND->getIdentifier(); assert(II && "Attempt to mangle unnamed decl."); const auto *FD = dyn_cast(ND); if (FD && FD->getType()->castAs()->getCallConv() == CC_X86RegCall) { if (CGM.getLangOpts().RegCall4) Out << "__regcall4__" << II->getName(); else Out << "__regcall3__" << II->getName(); } else if (FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); } else { Out << II->getName(); } } // Check if the module name hash should be appended for internal linkage // symbols. This should come before multi-version target suffixes are // appended. This is to keep the name and module hash suffix of the // internal linkage function together. The unique suffix should only be // added when name mangling is done to make sure that the final name can // be properly demangled. For example, for C functions without prototypes, // name mangling is not done and the unique suffix should not be appeneded // then. if (ShouldMangle && isUniqueInternalLinkageDecl(GD, CGM)) { assert(CGM.getCodeGenOpts().UniqueInternalLinkageNames && "Hash computed when not explicitly requested"); Out << CGM.getModuleNameHash(); } if (const auto *FD = dyn_cast(ND)) if (FD->isMultiVersion() && !OmitMultiVersionMangling) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::CPUDispatch: case MultiVersionKind::CPUSpecific: AppendCPUSpecificCPUDispatchMangling(CGM, FD->getAttr(), GD.getMultiVersionIndex(), Out); break; case MultiVersionKind::Target: { auto *Attr = FD->getAttr(); assert(Attr && "Expected TargetAttr to be present " "for attribute mangling"); const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo(); Info.appendAttributeMangling(Attr, Out); break; } case MultiVersionKind::TargetVersion: { auto *Attr = FD->getAttr(); assert(Attr && "Expected TargetVersionAttr to be present " "for attribute mangling"); const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo(); Info.appendAttributeMangling(Attr, Out); break; } case MultiVersionKind::TargetClones: { auto *Attr = FD->getAttr(); assert(Attr && "Expected TargetClonesAttr to be present " "for attribute mangling"); unsigned Index = GD.getMultiVersionIndex(); const ABIInfo &Info = CGM.getTargetCodeGenInfo().getABIInfo(); Info.appendAttributeMangling(Attr, Index, Out); break; } case MultiVersionKind::None: llvm_unreachable("None multiversion type isn't valid here"); } } // Make unique name for device side static file-scope variable for HIP. if (CGM.getContext().shouldExternalize(ND) && CGM.getLangOpts().GPURelocatableDeviceCode && CGM.getLangOpts().CUDAIsDevice) CGM.printPostfixForExternalizedDecl(Out, ND); return std::string(Out.str()); } void CodeGenModule::UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD, StringRef &CurName) { if (!FD->isMultiVersion()) return; // Get the name of what this would be without the 'target' attribute. This // allows us to lookup the version that was emitted when this wasn't a // multiversion function. std::string NonTargetName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); GlobalDecl OtherGD; if (lookupRepresentativeDecl(NonTargetName, OtherGD)) { assert(OtherGD.getCanonicalDecl() .getDecl() ->getAsFunction() ->isMultiVersion() && "Other GD should now be a multiversioned function"); // OtherFD is the version of this function that was mangled BEFORE // becoming a MultiVersion function. It potentially needs to be updated. const FunctionDecl *OtherFD = OtherGD.getCanonicalDecl() .getDecl() ->getAsFunction() ->getMostRecentDecl(); std::string OtherName = getMangledNameImpl(*this, OtherGD, OtherFD); // This is so that if the initial version was already the 'default' // version, we don't try to update it. if (OtherName != NonTargetName) { // Remove instead of erase, since others may have stored the StringRef // to this. const auto ExistingRecord = Manglings.find(NonTargetName); if (ExistingRecord != std::end(Manglings)) Manglings.remove(&(*ExistingRecord)); auto Result = Manglings.insert(std::make_pair(OtherName, OtherGD)); StringRef OtherNameRef = MangledDeclNames[OtherGD.getCanonicalDecl()] = Result.first->first(); // If this is the current decl is being created, make sure we update the name. if (GD.getCanonicalDecl() == OtherGD.getCanonicalDecl()) CurName = OtherNameRef; if (llvm::GlobalValue *Entry = GetGlobalValue(NonTargetName)) Entry->setName(OtherName); } } } StringRef CodeGenModule::getMangledName(GlobalDecl GD) { GlobalDecl CanonicalGD = GD.getCanonicalDecl(); // Some ABIs don't have constructor variants. Make sure that base and // complete constructors get mangled the same. if (const auto *CD = dyn_cast(CanonicalGD.getDecl())) { if (!getTarget().getCXXABI().hasConstructorVariants()) { CXXCtorType OrigCtorType = GD.getCtorType(); assert(OrigCtorType == Ctor_Base || OrigCtorType == Ctor_Complete); if (OrigCtorType == Ctor_Base) CanonicalGD = GlobalDecl(CD, Ctor_Complete); } } // In CUDA/HIP device compilation with -fgpu-rdc, the mangled name of a // static device variable depends on whether the variable is referenced by // a host or device host function. Therefore the mangled name cannot be // cached. if (!LangOpts.CUDAIsDevice || !getContext().mayExternalize(GD.getDecl())) { auto FoundName = MangledDeclNames.find(CanonicalGD); if (FoundName != MangledDeclNames.end()) return FoundName->second; } // Keep the first result in the case of a mangling collision. const auto *ND = cast(GD.getDecl()); std::string MangledName = getMangledNameImpl(*this, GD, ND); // Ensure either we have different ABIs between host and device compilations, // says host compilation following MSVC ABI but device compilation follows // Itanium C++ ABI or, if they follow the same ABI, kernel names after // mangling should be the same after name stubbing. The later checking is // very important as the device kernel name being mangled in host-compilation // is used to resolve the device binaries to be executed. Inconsistent naming // result in undefined behavior. Even though we cannot check that naming // directly between host- and device-compilations, the host- and // device-mangling in host compilation could help catching certain ones. assert(!isa(ND) || !ND->hasAttr() || getContext().shouldExternalize(ND) || getLangOpts().CUDAIsDevice || (getContext().getAuxTargetInfo() && (getContext().getAuxTargetInfo()->getCXXABI() != getContext().getTargetInfo().getCXXABI())) || getCUDARuntime().getDeviceSideName(ND) == getMangledNameImpl( *this, GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel), ND)); auto Result = Manglings.insert(std::make_pair(MangledName, GD)); return MangledDeclNames[CanonicalGD] = Result.first->first(); } StringRef CodeGenModule::getBlockMangledName(GlobalDecl GD, const BlockDecl *BD) { MangleContext &MangleCtx = getCXXABI().getMangleContext(); const Decl *D = GD.getDecl(); SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); if (!D) MangleCtx.mangleGlobalBlock(BD, dyn_cast_or_null(initializedGlobalDecl.getDecl()), Out); else if (const auto *CD = dyn_cast(D)) MangleCtx.mangleCtorBlock(CD, GD.getCtorType(), BD, Out); else if (const auto *DD = dyn_cast(D)) MangleCtx.mangleDtorBlock(DD, GD.getDtorType(), BD, Out); else MangleCtx.mangleBlock(cast(D), BD, Out); auto Result = Manglings.insert(std::make_pair(Out.str(), BD)); return Result.first->first(); } const GlobalDecl CodeGenModule::getMangledNameDecl(StringRef Name) { auto it = MangledDeclNames.begin(); while (it != MangledDeclNames.end()) { if (it->second == Name) return it->first; it++; } return GlobalDecl(); } llvm::GlobalValue *CodeGenModule::GetGlobalValue(StringRef Name) { return getModule().getNamedValue(Name); } /// AddGlobalCtor - Add a function to the list that will be called before /// main() runs. void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority, unsigned LexOrder, llvm::Constant *AssociatedData) { // FIXME: Type coercion of void()* types. GlobalCtors.push_back(Structor(Priority, LexOrder, Ctor, AssociatedData)); } /// AddGlobalDtor - Add a function to the list that will be called /// when the module is unloaded. void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority, bool IsDtorAttrFunc) { if (CodeGenOpts.RegisterGlobalDtorsWithAtExit && (!getContext().getTargetInfo().getTriple().isOSAIX() || IsDtorAttrFunc)) { DtorsUsingAtExit[Priority].push_back(Dtor); return; } // FIXME: Type coercion of void()* types. GlobalDtors.push_back(Structor(Priority, ~0U, Dtor, nullptr)); } void CodeGenModule::EmitCtorList(CtorList &Fns, const char *GlobalName) { if (Fns.empty()) return; // Ctor function type is void()*. llvm::FunctionType* CtorFTy = llvm::FunctionType::get(VoidTy, false); llvm::Type *CtorPFTy = llvm::PointerType::get(CtorFTy, TheModule.getDataLayout().getProgramAddressSpace()); // Get the type of a ctor entry, { i32, void ()*, i8* }. llvm::StructType *CtorStructTy = llvm::StructType::get( Int32Ty, CtorPFTy, VoidPtrTy); // Construct the constructor and destructor arrays. ConstantInitBuilder builder(*this); auto ctors = builder.beginArray(CtorStructTy); for (const auto &I : Fns) { auto ctor = ctors.beginStruct(CtorStructTy); ctor.addInt(Int32Ty, I.Priority); ctor.add(I.Initializer); if (I.AssociatedData) ctor.add(I.AssociatedData); else ctor.addNullPointer(VoidPtrTy); ctor.finishAndAddTo(ctors); } auto list = ctors.finishAndCreateGlobal(GlobalName, getPointerAlign(), /*constant*/ false, llvm::GlobalValue::AppendingLinkage); // The LTO linker doesn't seem to like it when we set an alignment // on appending variables. Take it off as a workaround. list->setAlignment(std::nullopt); Fns.clear(); } llvm::GlobalValue::LinkageTypes CodeGenModule::getFunctionLinkage(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); GVALinkage Linkage = getContext().GetGVALinkageForFunction(D); if (const auto *Dtor = dyn_cast(D)) return getCXXABI().getCXXDestructorLinkage(Linkage, Dtor, GD.getDtorType()); return getLLVMLinkageForDeclarator(D, Linkage); } llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) { llvm::MDString *MDS = dyn_cast(MD); if (!MDS) return nullptr; return llvm::ConstantInt::get(Int64Ty, llvm::MD5Hash(MDS->getString())); } llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) { if (auto *FnType = T->getAs()) T = getContext().getFunctionType( FnType->getReturnType(), FnType->getParamTypes(), FnType->getExtProtoInfo().withExceptionSpec(EST_None)); std::string OutName; llvm::raw_string_ostream Out(OutName); getCXXABI().getMangleContext().mangleCanonicalTypeName( T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) Out << ".normalized"; return llvm::ConstantInt::get(Int32Ty, static_cast(llvm::xxHash64(OutName))); } void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD, const CGFunctionInfo &Info, llvm::Function *F, bool IsThunk) { unsigned CallingConv; llvm::AttributeList PAL; ConstructAttributeList(F->getName(), Info, GD, PAL, CallingConv, /*AttrOnCallSite=*/false, IsThunk); if (CallingConv == llvm::CallingConv::X86_VectorCall && getTarget().getTriple().isWindowsArm64EC()) { SourceLocation Loc; if (const Decl *D = GD.getDecl()) Loc = D->getLocation(); Error(Loc, "__vectorcall calling convention is not currently supported"); } F->setAttributes(PAL); F->setCallingConv(static_cast(CallingConv)); } static void removeImageAccessQualifier(std::string& TyName) { std::string ReadOnlyQual("__read_only"); std::string::size_type ReadOnlyPos = TyName.find(ReadOnlyQual); if (ReadOnlyPos != std::string::npos) // "+ 1" for the space after access qualifier. TyName.erase(ReadOnlyPos, ReadOnlyQual.size() + 1); else { std::string WriteOnlyQual("__write_only"); std::string::size_type WriteOnlyPos = TyName.find(WriteOnlyQual); if (WriteOnlyPos != std::string::npos) TyName.erase(WriteOnlyPos, WriteOnlyQual.size() + 1); else { std::string ReadWriteQual("__read_write"); std::string::size_type ReadWritePos = TyName.find(ReadWriteQual); if (ReadWritePos != std::string::npos) TyName.erase(ReadWritePos, ReadWriteQual.size() + 1); } } } // Returns the address space id that should be produced to the // kernel_arg_addr_space metadata. This is always fixed to the ids // as specified in the SPIR 2.0 specification in order to differentiate // for example in clGetKernelArgInfo() implementation between the address // spaces with targets without unique mapping to the OpenCL address spaces // (basically all single AS CPUs). static unsigned ArgInfoAddressSpace(LangAS AS) { switch (AS) { case LangAS::opencl_global: return 1; case LangAS::opencl_constant: return 2; case LangAS::opencl_local: return 3; case LangAS::opencl_generic: return 4; // Not in SPIR 2.0 specs. case LangAS::opencl_global_device: return 5; case LangAS::opencl_global_host: return 6; default: return 0; // Assume private. } } void CodeGenModule::GenKernelArgMetadata(llvm::Function *Fn, const FunctionDecl *FD, CodeGenFunction *CGF) { assert(((FD && CGF) || (!FD && !CGF)) && "Incorrect use - FD and CGF should either be both null or not!"); // Create MDNodes that represent the kernel arg metadata. // Each MDNode is a list in the form of "key", N number of values which is // the same number of values as their are kernel arguments. const PrintingPolicy &Policy = Context.getPrintingPolicy(); // MDNode for the kernel argument address space qualifiers. SmallVector addressQuals; // MDNode for the kernel argument access qualifiers (images only). SmallVector accessQuals; // MDNode for the kernel argument type names. SmallVector argTypeNames; // MDNode for the kernel argument base type names. SmallVector argBaseTypeNames; // MDNode for the kernel argument type qualifiers. SmallVector argTypeQuals; // MDNode for the kernel argument names. SmallVector argNames; if (FD && CGF) for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { const ParmVarDecl *parm = FD->getParamDecl(i); // Get argument name. argNames.push_back(llvm::MDString::get(VMContext, parm->getName())); if (!getLangOpts().OpenCL) continue; QualType ty = parm->getType(); std::string typeQuals; // Get image and pipe access qualifier: if (ty->isImageType() || ty->isPipeType()) { const Decl *PDecl = parm; if (const auto *TD = ty->getAs()) PDecl = TD->getDecl(); const OpenCLAccessAttr *A = PDecl->getAttr(); if (A && A->isWriteOnly()) accessQuals.push_back(llvm::MDString::get(VMContext, "write_only")); else if (A && A->isReadWrite()) accessQuals.push_back(llvm::MDString::get(VMContext, "read_write")); else accessQuals.push_back(llvm::MDString::get(VMContext, "read_only")); } else accessQuals.push_back(llvm::MDString::get(VMContext, "none")); auto getTypeSpelling = [&](QualType Ty) { auto typeName = Ty.getUnqualifiedType().getAsString(Policy); if (Ty.isCanonical()) { StringRef typeNameRef = typeName; // Turn "unsigned type" to "utype" if (typeNameRef.consume_front("unsigned ")) return std::string("u") + typeNameRef.str(); if (typeNameRef.consume_front("signed ")) return typeNameRef.str(); } return typeName; }; if (ty->isPointerType()) { QualType pointeeTy = ty->getPointeeType(); // Get address qualifier. addressQuals.push_back( llvm::ConstantAsMetadata::get(CGF->Builder.getInt32( ArgInfoAddressSpace(pointeeTy.getAddressSpace())))); // Get argument type name. std::string typeName = getTypeSpelling(pointeeTy) + "*"; std::string baseTypeName = getTypeSpelling(pointeeTy.getCanonicalType()) + "*"; argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); argBaseTypeNames.push_back( llvm::MDString::get(VMContext, baseTypeName)); // Get argument type qualifiers: if (ty.isRestrictQualified()) typeQuals = "restrict"; if (pointeeTy.isConstQualified() || (pointeeTy.getAddressSpace() == LangAS::opencl_constant)) typeQuals += typeQuals.empty() ? "const" : " const"; if (pointeeTy.isVolatileQualified()) typeQuals += typeQuals.empty() ? "volatile" : " volatile"; } else { uint32_t AddrSpc = 0; bool isPipe = ty->isPipeType(); if (ty->isImageType() || isPipe) AddrSpc = ArgInfoAddressSpace(LangAS::opencl_global); addressQuals.push_back( llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(AddrSpc))); // Get argument type name. ty = isPipe ? ty->castAs()->getElementType() : ty; std::string typeName = getTypeSpelling(ty); std::string baseTypeName = getTypeSpelling(ty.getCanonicalType()); // Remove access qualifiers on images // (as they are inseparable from type in clang implementation, // but OpenCL spec provides a special query to get access qualifier // via clGetKernelArgInfo with CL_KERNEL_ARG_ACCESS_QUALIFIER): if (ty->isImageType()) { removeImageAccessQualifier(typeName); removeImageAccessQualifier(baseTypeName); } argTypeNames.push_back(llvm::MDString::get(VMContext, typeName)); argBaseTypeNames.push_back( llvm::MDString::get(VMContext, baseTypeName)); if (isPipe) typeQuals = "pipe"; } argTypeQuals.push_back(llvm::MDString::get(VMContext, typeQuals)); } if (getLangOpts().OpenCL) { Fn->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(VMContext, addressQuals)); Fn->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(VMContext, accessQuals)); Fn->setMetadata("kernel_arg_type", llvm::MDNode::get(VMContext, argTypeNames)); Fn->setMetadata("kernel_arg_base_type", llvm::MDNode::get(VMContext, argBaseTypeNames)); Fn->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(VMContext, argTypeQuals)); } if (getCodeGenOpts().EmitOpenCLArgMetadata || getCodeGenOpts().HIPSaveKernelArgName) Fn->setMetadata("kernel_arg_name", llvm::MDNode::get(VMContext, argNames)); } /// Determines whether the language options require us to model /// unwind exceptions. We treat -fexceptions as mandating this /// except under the fragile ObjC ABI with only ObjC exceptions /// enabled. This means, for example, that C with -fexceptions /// enables this. static bool hasUnwindExceptions(const LangOptions &LangOpts) { // If exceptions are completely disabled, obviously this is false. if (!LangOpts.Exceptions) return false; // If C++ exceptions are enabled, this is true. if (LangOpts.CXXExceptions) return true; // If ObjC exceptions are enabled, this depends on the ABI. if (LangOpts.ObjCExceptions) { return LangOpts.ObjCRuntime.hasUnwindExceptions(); } return true; } static bool requiresMemberFunctionPointerTypeMetadata(CodeGenModule &CGM, const CXXMethodDecl *MD) { // Check that the type metadata can ever actually be used by a call. if (!CGM.getCodeGenOpts().LTOUnit || !CGM.HasHiddenLTOVisibility(MD->getParent())) return false; // Only functions whose address can be taken with a member function pointer // need this sort of type metadata. return MD->isImplicitObjectMemberFunction() && !MD->isVirtual() && !isa(MD); } SmallVector CodeGenModule::getMostBaseClasses(const CXXRecordDecl *RD) { llvm::SetVector MostBases; std::function CollectMostBases; CollectMostBases = [&](const CXXRecordDecl *RD) { if (RD->getNumBases() == 0) MostBases.insert(RD); for (const CXXBaseSpecifier &B : RD->bases()) CollectMostBases(B.getType()->getAsCXXRecordDecl()); }; CollectMostBases(RD); return MostBases.takeVector(); } void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, llvm::Function *F) { llvm::AttrBuilder B(F->getContext()); if ((!D || !D->hasAttr()) && CodeGenOpts.UnwindTables) B.addUWTableAttr(llvm::UWTableKind(CodeGenOpts.UnwindTables)); if (CodeGenOpts.StackClashProtector) B.addAttribute("probe-stack", "inline-asm"); if (CodeGenOpts.StackProbeSize && CodeGenOpts.StackProbeSize != 4096) B.addAttribute("stack-probe-size", std::to_string(CodeGenOpts.StackProbeSize)); if (!hasUnwindExceptions(LangOpts)) B.addAttribute(llvm::Attribute::NoUnwind); if (D && D->hasAttr()) ; // Do nothing. else if (D && D->hasAttr() && isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn)) B.addAttribute(llvm::Attribute::StackProtectStrong); else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPOn)) B.addAttribute(llvm::Attribute::StackProtect); else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPStrong)) B.addAttribute(llvm::Attribute::StackProtectStrong); else if (isStackProtectorOn(LangOpts, getTriple(), LangOptions::SSPReq)) B.addAttribute(llvm::Attribute::StackProtectReq); if (!D) { // If we don't have a declaration to control inlining, the function isn't // explicitly marked as alwaysinline for semantic reasons, and inlining is // disabled, mark the function as noinline. if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) && CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) B.addAttribute(llvm::Attribute::NoInline); F->addFnAttrs(B); return; } // Handle SME attributes that apply to function definitions, // rather than to function prototypes. if (D->hasAttr()) B.addAttribute("aarch64_pstate_sm_body"); if (auto *Attr = D->getAttr()) { if (Attr->isNewZA()) B.addAttribute("aarch64_new_za"); if (Attr->isNewZT0()) B.addAttribute("aarch64_new_zt0"); } // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; // We can't add optnone in the following cases, it won't pass the verifier. ShouldAddOptNone &= !D->hasAttr(); ShouldAddOptNone &= !D->hasAttr(); // Add optnone, but do so only if the function isn't always_inline. if ((ShouldAddOptNone || D->hasAttr()) && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. B.addAttribute(llvm::Attribute::NoInline); // We still need to handle naked functions even though optnone subsumes // much of their semantics. if (D->hasAttr()) B.addAttribute(llvm::Attribute::Naked); // OptimizeNone wins over OptimizeForSize and MinSize. F->removeFnAttr(llvm::Attribute::OptimizeForSize); F->removeFnAttr(llvm::Attribute::MinSize); } else if (D->hasAttr()) { // Naked implies noinline: we should not be inlining such functions. B.addAttribute(llvm::Attribute::Naked); B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr()) { B.addAttribute(llvm::Attribute::NoDuplicate); } else if (D->hasAttr() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { // Add noinline if the function isn't always_inline. B.addAttribute(llvm::Attribute::NoInline); } else if (D->hasAttr() && !F->hasFnAttribute(llvm::Attribute::NoInline)) { // (noinline wins over always_inline, and we can't specify both in IR) B.addAttribute(llvm::Attribute::AlwaysInline); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining) { // If we're not inlining, then force everything that isn't always_inline to // carry an explicit noinline attribute. if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline)) B.addAttribute(llvm::Attribute::NoInline); } else { // Otherwise, propagate the inline hint attribute and potentially use its // absence to mark things as noinline. if (auto *FD = dyn_cast(D)) { // Search function and template pattern redeclarations for inline. auto CheckForInline = [](const FunctionDecl *FD) { auto CheckRedeclForInline = [](const FunctionDecl *Redecl) { return Redecl->isInlineSpecified(); }; if (any_of(FD->redecls(), CheckRedeclForInline)) return true; const FunctionDecl *Pattern = FD->getTemplateInstantiationPattern(); if (!Pattern) return false; return any_of(Pattern->redecls(), CheckRedeclForInline); }; if (CheckForInline(FD)) { B.addAttribute(llvm::Attribute::InlineHint); } else if (CodeGenOpts.getInlining() == CodeGenOptions::OnlyHintInlining && !FD->isInlined() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) { B.addAttribute(llvm::Attribute::NoInline); } } } // Add other optimization related attributes if we are optimizing this // function. if (!D->hasAttr()) { if (D->hasAttr()) { if (!ShouldAddOptNone) B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } if (D->hasAttr()) B.addAttribute(llvm::Attribute::Hot); if (D->hasAttr()) B.addAttribute(llvm::Attribute::MinSize); } F->addFnAttrs(B); unsigned alignment = D->getMaxAlignment() / Context.getCharWidth(); if (alignment) F->setAlignment(llvm::Align(alignment)); if (!D->hasAttr()) if (LangOpts.FunctionAlignment) F->setAlignment(llvm::Align(1ull << LangOpts.FunctionAlignment)); // Some C++ ABIs require 2-byte alignment for member functions, in order to // reserve a bit for differentiating between virtual and non-virtual member // functions. If the current target's C++ ABI requires this and this is a // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { if (isa(D) && F->getPointerAlignment(getDataLayout()) < 2) F->setAlignment(std::max(llvm::Align(2), F->getAlign().valueOrOne())); } // In the cross-dso CFI mode with canonical jump tables, we want !type // attributes on definitions only. if (CodeGenOpts.SanitizeCfiCrossDso && CodeGenOpts.SanitizeCfiCanonicalJumpTables) { if (auto *FD = dyn_cast(D)) { // Skip available_externally functions. They won't be codegen'ed in the // current module anyway. if (getContext().GetGVALinkageForFunction(FD) != GVA_AvailableExternally) CreateFunctionTypeMetadataForIcall(FD, F); } } // Emit type metadata on member functions for member function pointer checks. // These are only ever necessary on definitions; we're guaranteed that the // definition will be present in the LTO unit as a result of LTO visibility. auto *MD = dyn_cast(D); if (MD && requiresMemberFunctionPointerTypeMetadata(*this, MD)) { for (const CXXRecordDecl *Base : getMostBaseClasses(MD->getParent())) { llvm::Metadata *Id = CreateMetadataIdentifierForType(Context.getMemberPointerType( MD->getType(), Context.getRecordType(Base).getTypePtr())); F->addTypeMetadata(0, Id); } } } void CodeGenModule::SetCommonAttributes(GlobalDecl GD, llvm::GlobalValue *GV) { const Decl *D = GD.getDecl(); if (isa_and_nonnull(D)) setGVProperties(GV, GD); else GV->setVisibility(llvm::GlobalValue::DefaultVisibility); if (D && D->hasAttr()) addUsedOrCompilerUsedGlobal(GV); if (const auto *VD = dyn_cast_if_present(D); VD && ((CodeGenOpts.KeepPersistentStorageVariables && (VD->getStorageDuration() == SD_Static || VD->getStorageDuration() == SD_Thread)) || (CodeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static && VD->getType().isConstQualified()))) addUsedOrCompilerUsedGlobal(GV); } bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, llvm::AttrBuilder &Attrs, bool SetTargetFeatures) { // Add target-cpu and target-features attributes to functions. If // we have a decl for the function and it has a target attribute then // parse that and add it to the feature set. StringRef TargetCPU = getTarget().getTargetOpts().CPU; StringRef TuneCPU = getTarget().getTargetOpts().TuneCPU; std::vector Features; const auto *FD = dyn_cast_or_null(GD.getDecl()); FD = FD ? FD->getMostRecentDecl() : FD; const auto *TD = FD ? FD->getAttr() : nullptr; const auto *TV = FD ? FD->getAttr() : nullptr; assert((!TD || !TV) && "both target_version and target specified"); const auto *SD = FD ? FD->getAttr() : nullptr; const auto *TC = FD ? FD->getAttr() : nullptr; bool AddedAttr = false; if (TD || TV || SD || TC) { llvm::StringMap FeatureMap; getContext().getFunctionFeatureMap(FeatureMap, GD); // Produce the canonical string for this set of features. for (const llvm::StringMap::value_type &Entry : FeatureMap) Features.push_back((Entry.getValue() ? "+" : "-") + Entry.getKey().str()); // Now add the target-cpu and target-features to the function. // While we populated the feature map above, we still need to // get and parse the target attribute so we can get the cpu for // the function. if (TD) { ParsedTargetAttr ParsedAttr = Target.parseTargetAttr(TD->getFeaturesStr()); if (!ParsedAttr.CPU.empty() && getTarget().isValidCPUName(ParsedAttr.CPU)) { TargetCPU = ParsedAttr.CPU; TuneCPU = ""; // Clear the tune CPU. } if (!ParsedAttr.Tune.empty() && getTarget().isValidCPUName(ParsedAttr.Tune)) TuneCPU = ParsedAttr.Tune; } if (SD) { // Apply the given CPU name as the 'tune-cpu' so that the optimizer can // favor this processor. TuneCPU = SD->getCPUName(GD.getMultiVersionIndex())->getName(); } } else { // Otherwise just add the existing target cpu and target features to the // function. Features = getTarget().getTargetOpts().Features; } if (!TargetCPU.empty()) { Attrs.addAttribute("target-cpu", TargetCPU); AddedAttr = true; } if (!TuneCPU.empty()) { Attrs.addAttribute("tune-cpu", TuneCPU); AddedAttr = true; } if (!Features.empty() && SetTargetFeatures) { llvm::erase_if(Features, [&](const std::string& F) { return getTarget().isReadOnlyFeature(F.substr(1)); }); llvm::sort(Features); Attrs.addAttribute("target-features", llvm::join(Features, ",")); AddedAttr = true; } return AddedAttr; } void CodeGenModule::setNonAliasAttributes(GlobalDecl GD, llvm::GlobalObject *GO) { const Decl *D = GD.getDecl(); SetCommonAttributes(GD, GO); if (D) { if (auto *GV = dyn_cast(GO)) { if (D->hasAttr()) addUsedGlobal(GV); if (auto *SA = D->getAttr()) GV->addAttribute("bss-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("data-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("rodata-section", SA->getName()); if (auto *SA = D->getAttr()) GV->addAttribute("relro-section", SA->getName()); } if (auto *F = dyn_cast(GO)) { if (D->hasAttr()) addUsedGlobal(F); if (auto *SA = D->getAttr()) if (!D->getAttr()) F->setSection(SA->getName()); llvm::AttrBuilder Attrs(F->getContext()); if (GetCPUAndFeaturesAttributes(GD, Attrs)) { // We know that GetCPUAndFeaturesAttributes will always have the // newest set, since it has the newest possible FunctionDecl, so the // new ones should replace the old. llvm::AttributeMask RemoveAttrs; RemoveAttrs.addAttribute("target-cpu"); RemoveAttrs.addAttribute("target-features"); RemoveAttrs.addAttribute("tune-cpu"); F->removeFnAttrs(RemoveAttrs); F->addFnAttrs(Attrs); } } if (const auto *CSA = D->getAttr()) GO->setSection(CSA->getName()); else if (const auto *SA = D->getAttr()) GO->setSection(SA->getName()); } getTargetCodeGenInfo().setTargetAttributes(D, GO, *this); } void CodeGenModule::SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI) { const Decl *D = GD.getDecl(); SetLLVMFunctionAttributes(GD, FI, F, /*IsThunk=*/false); SetLLVMFunctionAttributesForDefinition(D, F); F->setLinkage(llvm::Function::InternalLinkage); setNonAliasAttributes(GD, F); } static void setLinkageForGV(llvm::GlobalValue *GV, const NamedDecl *ND) { // Set linkage and visibility in case we never see a definition. LinkageInfo LV = ND->getLinkageAndVisibility(); // Don't set internal linkage on declarations. // "extern_weak" is overloaded in LLVM; we probably should have // separate linkage types for this. if (isExternallyVisible(LV.getLinkage()) && (ND->hasAttr() || ND->isWeakImported())) GV->setLinkage(llvm::GlobalValue::ExternalWeakLinkage); } void CodeGenModule::CreateFunctionTypeMetadataForIcall(const FunctionDecl *FD, llvm::Function *F) { // Only if we are checking indirect calls. if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall)) return; // Non-static class methods are handled via vtable or member function pointer // checks elsewhere. if (isa(FD) && !cast(FD)->isStatic()) return; llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType()); F->addTypeMetadata(0, MD); F->addTypeMetadata(0, CreateMetadataIdentifierGeneralized(FD->getType())); // Emit a hash-based bit set entry for cross-DSO calls. if (CodeGenOpts.SanitizeCfiCrossDso) if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) F->addTypeMetadata(0, llvm::ConstantAsMetadata::get(CrossDsoTypeId)); } void CodeGenModule::setKCFIType(const FunctionDecl *FD, llvm::Function *F) { llvm::LLVMContext &Ctx = F->getContext(); llvm::MDBuilder MDB(Ctx); F->setMetadata(llvm::LLVMContext::MD_kcfi_type, llvm::MDNode::get( Ctx, MDB.createConstant(CreateKCFITypeId(FD->getType())))); } static bool allowKCFIIdentifier(StringRef Name) { // KCFI type identifier constants are only necessary for external assembly // functions, which means it's safe to skip unusual names. Subset of // MCAsmInfo::isAcceptableChar() and MCAsmInfoXCOFF::isAcceptableChar(). return llvm::all_of(Name, [](const char &C) { return llvm::isAlnum(C) || C == '_' || C == '.'; }); } void CodeGenModule::finalizeKCFITypes() { llvm::Module &M = getModule(); for (auto &F : M.functions()) { // Remove KCFI type metadata from non-address-taken local functions. bool AddressTaken = F.hasAddressTaken(); if (!AddressTaken && F.hasLocalLinkage()) F.eraseMetadata(llvm::LLVMContext::MD_kcfi_type); // Generate a constant with the expected KCFI type identifier for all // address-taken function declarations to support annotating indirectly // called assembly functions. if (!AddressTaken || !F.isDeclaration()) continue; const llvm::ConstantInt *Type; if (const llvm::MDNode *MD = F.getMetadata(llvm::LLVMContext::MD_kcfi_type)) Type = llvm::mdconst::extract(MD->getOperand(0)); else continue; StringRef Name = F.getName(); if (!allowKCFIIdentifier(Name)) continue; std::string Asm = (".weak __kcfi_typeid_" + Name + "\n.set __kcfi_typeid_" + Name + ", " + Twine(Type->getZExtValue()) + "\n") .str(); M.appendModuleInlineAsm(Asm); } } void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F, bool IsIncompleteFunction, bool IsThunk) { if (llvm::Intrinsic::ID IID = F->getIntrinsicID()) { // If this is an intrinsic function, set the function's attributes // to the intrinsic's attributes. F->setAttributes(llvm::Intrinsic::getAttributes(getLLVMContext(), IID)); return; } const auto *FD = cast(GD.getDecl()); if (!IsIncompleteFunction) SetLLVMFunctionAttributes(GD, getTypes().arrangeGlobalDeclaration(GD), F, IsThunk); // Add the Returned attribute for "this", except for iOS 5 and earlier // where substantial code, including the libstdc++ dylib, was compiled with // GCC and does not actually return "this". if (!IsThunk && getCXXABI().HasThisReturn(GD) && !(getTriple().isiOS() && getTriple().isOSVersionLT(6))) { assert(!F->arg_empty() && F->arg_begin()->getType() ->canLosslesslyBitCastTo(F->getReturnType()) && "unexpected this return"); F->addParamAttr(0, llvm::Attribute::Returned); } // Only a few attributes are set on declarations; these may later be // overridden by a definition. setLinkageForGV(F, FD); setGVProperties(F, FD); // Setup target-specific attributes. if (!IsIncompleteFunction && F->isDeclaration()) getTargetCodeGenInfo().setTargetAttributes(FD, F, *this); if (const auto *CSA = FD->getAttr()) F->setSection(CSA->getName()); else if (const auto *SA = FD->getAttr()) F->setSection(SA->getName()); if (const auto *EA = FD->getAttr()) { if (EA->isError()) F->addFnAttr("dontcall-error", EA->getUserDiagnostic()); else if (EA->isWarning()) F->addFnAttr("dontcall-warn", EA->getUserDiagnostic()); } // If we plan on emitting this inline builtin, we can't treat it as a builtin. if (FD->isInlineBuiltinDeclaration()) { const FunctionDecl *FDBody; bool HasBody = FD->hasBody(FDBody); (void)HasBody; assert(HasBody && "Inline builtin declarations should always have an " "available body!"); if (shouldEmitFunction(FDBody)) F->addFnAttr(llvm::Attribute::NoBuiltin); } if (FD->isReplaceableGlobalAllocationFunction()) { // A replaceable global allocation function does not act like a builtin by // default, only if it is invoked by a new-expression or delete-expression. F->addFnAttr(llvm::Attribute::NoBuiltin); } if (isa(FD) || isa(FD)) F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); else if (const auto *MD = dyn_cast(FD)) if (MD->isVirtual()) F->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't emit entries for function declarations in the cross-DSO mode. This // is handled with better precision by the receiving DSO. But if jump tables // are non-canonical then we need type metadata in order to produce the local // jump table. if (!CodeGenOpts.SanitizeCfiCrossDso || !CodeGenOpts.SanitizeCfiCanonicalJumpTables) CreateFunctionTypeMetadataForIcall(FD, F); if (LangOpts.Sanitize.has(SanitizerKind::KCFI)) setKCFIType(FD, F); if (getLangOpts().OpenMP && FD->hasAttr()) getOpenMPRuntime().emitDeclareSimdFunction(FD, F); if (CodeGenOpts.InlineMaxStackSize != UINT_MAX) F->addFnAttr("inline-max-stacksize", llvm::utostr(CodeGenOpts.InlineMaxStackSize)); if (const auto *CB = FD->getAttr()) { // Annotate the callback behavior as metadata: // - The callback callee (as argument number). // - The callback payloads (as argument numbers). llvm::LLVMContext &Ctx = F->getContext(); llvm::MDBuilder MDB(Ctx); // The payload indices are all but the first one in the encoding. The first // identifies the callback callee. int CalleeIdx = *CB->encoding_begin(); ArrayRef PayloadIndices(CB->encoding_begin() + 1, CB->encoding_end()); F->addMetadata(llvm::LLVMContext::MD_callback, *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding( CalleeIdx, PayloadIndices, /* VarArgsArePassed */ false)})); } } void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) { assert((isa(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); LLVMUsed.emplace_back(GV); } void CodeGenModule::addCompilerUsedGlobal(llvm::GlobalValue *GV) { assert(!GV->isDeclaration() && "Only globals with definition can force usage."); LLVMCompilerUsed.emplace_back(GV); } void CodeGenModule::addUsedOrCompilerUsedGlobal(llvm::GlobalValue *GV) { assert((isa(GV) || !GV->isDeclaration()) && "Only globals with definition can force usage."); if (getTriple().isOSBinFormatELF()) LLVMCompilerUsed.emplace_back(GV); else LLVMUsed.emplace_back(GV); } static void emitUsed(CodeGenModule &CGM, StringRef Name, std::vector &List) { // Don't create llvm.used if there is no need. if (List.empty()) return; // Convert List to what ConstantArray needs. SmallVector UsedArray; UsedArray.resize(List.size()); for (unsigned i = 0, e = List.size(); i != e; ++i) { UsedArray[i] = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( cast(&*List[i]), CGM.Int8PtrTy); } if (UsedArray.empty()) return; llvm::ArrayType *ATy = llvm::ArrayType::get(CGM.Int8PtrTy, UsedArray.size()); auto *GV = new llvm::GlobalVariable( CGM.getModule(), ATy, false, llvm::GlobalValue::AppendingLinkage, llvm::ConstantArray::get(ATy, UsedArray), Name); GV->setSection("llvm.metadata"); } void CodeGenModule::emitLLVMUsed() { emitUsed(*this, "llvm.used", LLVMUsed); emitUsed(*this, "llvm.compiler.used", LLVMCompilerUsed); } void CodeGenModule::AppendLinkerOptions(StringRef Opts) { auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opts); LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } void CodeGenModule::AddDetectMismatch(StringRef Name, StringRef Value) { llvm::SmallString<32> Opt; getTargetCodeGenInfo().getDetectMismatchOption(Name, Value, Opt); if (Opt.empty()) return; auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); LinkerOptionsMetadata.push_back(llvm::MDNode::get(getLLVMContext(), MDOpts)); } void CodeGenModule::AddDependentLib(StringRef Lib) { auto &C = getLLVMContext(); if (getTarget().getTriple().isOSBinFormatELF()) { ELFDependentLibraries.push_back( llvm::MDNode::get(C, llvm::MDString::get(C, Lib))); return; } llvm::SmallString<24> Opt; getTargetCodeGenInfo().getDependentLibraryOption(Lib, Opt); auto *MDOpts = llvm::MDString::get(getLLVMContext(), Opt); LinkerOptionsMetadata.push_back(llvm::MDNode::get(C, MDOpts)); } /// Add link options implied by the given module, including modules /// it depends on, using a postorder walk. static void addLinkOptionsPostorder(CodeGenModule &CGM, Module *Mod, SmallVectorImpl &Metadata, llvm::SmallPtrSet &Visited) { // Import this module's parent. if (Mod->Parent && Visited.insert(Mod->Parent).second) { addLinkOptionsPostorder(CGM, Mod->Parent, Metadata, Visited); } // Import this module's dependencies. for (Module *Import : llvm::reverse(Mod->Imports)) { if (Visited.insert(Import).second) addLinkOptionsPostorder(CGM, Import, Metadata, Visited); } // Add linker options to link against the libraries/frameworks // described by this module. llvm::LLVMContext &Context = CGM.getLLVMContext(); bool IsELF = CGM.getTarget().getTriple().isOSBinFormatELF(); // For modules that use export_as for linking, use that module // name instead. if (Mod->UseExportAsModuleLinkName) return; for (const Module::LinkLibrary &LL : llvm::reverse(Mod->LinkLibraries)) { // Link against a framework. Frameworks are currently Darwin only, so we // don't to ask TargetCodeGenInfo for the spelling of the linker option. if (LL.IsFramework) { llvm::Metadata *Args[2] = {llvm::MDString::get(Context, "-framework"), llvm::MDString::get(Context, LL.Library)}; Metadata.push_back(llvm::MDNode::get(Context, Args)); continue; } // Link against a library. if (IsELF) { llvm::Metadata *Args[2] = { llvm::MDString::get(Context, "lib"), llvm::MDString::get(Context, LL.Library), }; Metadata.push_back(llvm::MDNode::get(Context, Args)); } else { llvm::SmallString<24> Opt; CGM.getTargetCodeGenInfo().getDependentLibraryOption(LL.Library, Opt); auto *OptString = llvm::MDString::get(Context, Opt); Metadata.push_back(llvm::MDNode::get(Context, OptString)); } } } void CodeGenModule::EmitModuleInitializers(clang::Module *Primary) { assert(Primary->isNamedModuleUnit() && "We should only emit module initializers for named modules."); // Emit the initializers in the order that sub-modules appear in the // source, first Global Module Fragments, if present. if (auto GMF = Primary->getGlobalModuleFragment()) { for (Decl *D : getContext().getModuleInitializers(GMF)) { if (isa(D)) continue; assert(isa(D) && "GMF initializer decl is not a var?"); EmitTopLevelDecl(D); } } // Second any associated with the module, itself. for (Decl *D : getContext().getModuleInitializers(Primary)) { // Skip import decls, the inits for those are called explicitly. if (isa(D)) continue; EmitTopLevelDecl(D); } // Third any associated with the Privat eMOdule Fragment, if present. if (auto PMF = Primary->getPrivateModuleFragment()) { for (Decl *D : getContext().getModuleInitializers(PMF)) { // Skip import decls, the inits for those are called explicitly. if (isa(D)) continue; assert(isa(D) && "PMF initializer decl is not a var?"); EmitTopLevelDecl(D); } } } void CodeGenModule::EmitModuleLinkOptions() { // Collect the set of all of the modules we want to visit to emit link // options, which is essentially the imported modules and all of their // non-explicit child modules. llvm::SetVector LinkModules; llvm::SmallPtrSet Visited; SmallVector Stack; // Seed the stack with imported modules. for (Module *M : ImportedModules) { // Do not add any link flags when an implementation TU of a module imports // a header of that same module. if (M->getTopLevelModuleName() == getLangOpts().CurrentModule && !getLangOpts().isCompilingModule()) continue; if (Visited.insert(M).second) Stack.push_back(M); } // Find all of the modules to import, making a little effort to prune // non-leaf modules. while (!Stack.empty()) { clang::Module *Mod = Stack.pop_back_val(); bool AnyChildren = false; // Visit the submodules of this module. for (const auto &SM : Mod->submodules()) { // Skip explicit children; they need to be explicitly imported to be // linked against. if (SM->IsExplicit) continue; if (Visited.insert(SM).second) { Stack.push_back(SM); AnyChildren = true; } } // We didn't find any children, so add this module to the list of // modules to link against. if (!AnyChildren) { LinkModules.insert(Mod); } } // Add link options for all of the imported modules in reverse topological // order. We don't do anything to try to order import link flags with respect // to linker options inserted by things like #pragma comment(). SmallVector MetadataArgs; Visited.clear(); for (Module *M : LinkModules) if (Visited.insert(M).second) addLinkOptionsPostorder(*this, M, MetadataArgs, Visited); std::reverse(MetadataArgs.begin(), MetadataArgs.end()); LinkerOptionsMetadata.append(MetadataArgs.begin(), MetadataArgs.end()); // Add the linker options metadata flag. auto *NMD = getModule().getOrInsertNamedMetadata("llvm.linker.options"); for (auto *MD : LinkerOptionsMetadata) NMD->addOperand(MD); } void CodeGenModule::EmitDeferred() { // Emit deferred declare target declarations. if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) getOpenMPRuntime().emitDeferredTargetDecls(); // Emit code for any potentially referenced deferred decls. Since a // previously unused static decl may become used during the generation of code // for a static function, iterate until no changes are made. if (!DeferredVTables.empty()) { EmitDeferredVTables(); // Emitting a vtable doesn't directly cause more vtables to // become deferred, although it can cause functions to be // emitted that then need those vtables. assert(DeferredVTables.empty()); } // Emit CUDA/HIP static device variables referenced by host code only. // Note we should not clear CUDADeviceVarODRUsedByHost since it is still // needed for further handling. if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) llvm::append_range(DeferredDeclsToEmit, getContext().CUDADeviceVarODRUsedByHost); // Stop if we're out of both deferred vtables and deferred declarations. if (DeferredDeclsToEmit.empty()) return; // Grab the list of decls to emit. If EmitGlobalDefinition schedules more // work, it will not interfere with this. std::vector CurDeclsToEmit; CurDeclsToEmit.swap(DeferredDeclsToEmit); for (GlobalDecl &D : CurDeclsToEmit) { // We should call GetAddrOfGlobal with IsForDefinition set to true in order // to get GlobalValue with exactly the type we need, not something that // might had been created for another decl with the same mangled name but // different type. llvm::GlobalValue *GV = dyn_cast( GetAddrOfGlobal(D, ForDefinition)); // In case of different address spaces, we may still get a cast, even with // IsForDefinition equal to true. Query mangled names table to get // GlobalValue. if (!GV) GV = GetGlobalValue(getMangledName(D)); // Make sure GetGlobalValue returned non-null. assert(GV); // Check to see if we've already emitted this. This is necessary // for a couple of reasons: first, decls can end up in the // deferred-decls queue multiple times, and second, decls can end // up with definitions in unusual ways (e.g. by an extern inline // function acquiring a strong function redefinition). Just // ignore these cases. if (!GV->isDeclaration()) continue; // If this is OpenMP, check if it is legal to emit this global normally. if (LangOpts.OpenMP && OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(D)) continue; // Otherwise, emit the definition and move on to the next one. EmitGlobalDefinition(D, GV); // If we found out that we need to emit more decls, do that recursively. // This has the advantage that the decls are emitted in a DFS and related // ones are close together, which is convenient for testing. if (!DeferredVTables.empty() || !DeferredDeclsToEmit.empty()) { EmitDeferred(); assert(DeferredVTables.empty() && DeferredDeclsToEmit.empty()); } } } void CodeGenModule::EmitVTablesOpportunistically() { // Try to emit external vtables as available_externally if they have emitted // all inlined virtual functions. It runs after EmitDeferred() and therefore // is not allowed to create new references to things that need to be emitted // lazily. Note that it also uses fact that we eagerly emitting RTTI. assert((OpportunisticVTables.empty() || shouldOpportunisticallyEmitVTables()) && "Only emit opportunistic vtables with optimizations"); for (const CXXRecordDecl *RD : OpportunisticVTables) { assert(getVTables().isVTableExternal(RD) && "This queue should only contain external vtables"); if (getCXXABI().canSpeculativelyEmitVTable(RD)) VTables.GenerateClassData(RD); } OpportunisticVTables.clear(); } void CodeGenModule::EmitGlobalAnnotations() { for (const auto& [MangledName, VD] : DeferredAnnotations) { llvm::GlobalValue *GV = GetGlobalValue(MangledName); if (GV) AddGlobalAnnotations(VD, GV); } DeferredAnnotations.clear(); if (Annotations.empty()) return; // Create a new global variable for the ConstantStruct in the Module. llvm::Constant *Array = llvm::ConstantArray::get(llvm::ArrayType::get( Annotations[0]->getType(), Annotations.size()), Annotations); auto *gv = new llvm::GlobalVariable(getModule(), Array->getType(), false, llvm::GlobalValue::AppendingLinkage, Array, "llvm.global.annotations"); gv->setSection(AnnotationSection); } llvm::Constant *CodeGenModule::EmitAnnotationString(StringRef Str) { llvm::Constant *&AStr = AnnotationStrings[Str]; if (AStr) return AStr; // Not found yet, create a new global. llvm::Constant *s = llvm::ConstantDataArray::getString(getLLVMContext(), Str); auto *gv = new llvm::GlobalVariable( getModule(), s->getType(), true, llvm::GlobalValue::PrivateLinkage, s, ".str", nullptr, llvm::GlobalValue::NotThreadLocal, ConstGlobalsPtrTy->getAddressSpace()); gv->setSection(AnnotationSection); gv->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); AStr = gv; return gv; } llvm::Constant *CodeGenModule::EmitAnnotationUnit(SourceLocation Loc) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(Loc); if (PLoc.isValid()) return EmitAnnotationString(PLoc.getFilename()); return EmitAnnotationString(SM.getBufferName(Loc)); } llvm::Constant *CodeGenModule::EmitAnnotationLineNo(SourceLocation L) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(L); unsigned LineNo = PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(L); return llvm::ConstantInt::get(Int32Ty, LineNo); } llvm::Constant *CodeGenModule::EmitAnnotationArgs(const AnnotateAttr *Attr) { ArrayRef Exprs = {Attr->args_begin(), Attr->args_size()}; if (Exprs.empty()) return llvm::ConstantPointerNull::get(ConstGlobalsPtrTy); llvm::FoldingSetNodeID ID; for (Expr *E : Exprs) { ID.Add(cast(E)->getAPValueResult()); } llvm::Constant *&Lookup = AnnotationArgs[ID.ComputeHash()]; if (Lookup) return Lookup; llvm::SmallVector LLVMArgs; LLVMArgs.reserve(Exprs.size()); ConstantEmitter ConstEmiter(*this); llvm::transform(Exprs, std::back_inserter(LLVMArgs), [&](const Expr *E) { const auto *CE = cast(E); return ConstEmiter.emitAbstract(CE->getBeginLoc(), CE->getAPValueResult(), CE->getType()); }); auto *Struct = llvm::ConstantStruct::getAnon(LLVMArgs); auto *GV = new llvm::GlobalVariable(getModule(), Struct->getType(), true, llvm::GlobalValue::PrivateLinkage, Struct, ".args"); GV->setSection(AnnotationSection); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); Lookup = GV; return GV; } llvm::Constant *CodeGenModule::EmitAnnotateAttr(llvm::GlobalValue *GV, const AnnotateAttr *AA, SourceLocation L) { // Get the globals for file name, annotation, and the line number. llvm::Constant *AnnoGV = EmitAnnotationString(AA->getAnnotation()), *UnitGV = EmitAnnotationUnit(L), *LineNoCst = EmitAnnotationLineNo(L), *Args = EmitAnnotationArgs(AA); llvm::Constant *GVInGlobalsAS = GV; if (GV->getAddressSpace() != getDataLayout().getDefaultGlobalsAddressSpace()) { GVInGlobalsAS = llvm::ConstantExpr::getAddrSpaceCast( GV, llvm::PointerType::get( GV->getContext(), getDataLayout().getDefaultGlobalsAddressSpace())); } // Create the ConstantStruct for the global annotation. llvm::Constant *Fields[] = { GVInGlobalsAS, AnnoGV, UnitGV, LineNoCst, Args, }; return llvm::ConstantStruct::getAnon(Fields); } void CodeGenModule::AddGlobalAnnotations(const ValueDecl *D, llvm::GlobalValue *GV) { assert(D->hasAttr() && "no annotate attribute"); // Get the struct elements for these annotations. for (const auto *I : D->specific_attrs()) Annotations.push_back(EmitAnnotateAttr(GV, I, D->getLocation())); } bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::Function *Fn, SourceLocation Loc) const { const auto &NoSanitizeL = getContext().getNoSanitizeList(); // NoSanitize by function name. if (NoSanitizeL.containsFunction(Kind, Fn->getName())) return true; // NoSanitize by location. Check "mainfile" prefix. auto &SM = Context.getSourceManager(); FileEntryRef MainFile = *SM.getFileEntryRefForID(SM.getMainFileID()); if (NoSanitizeL.containsMainFile(Kind, MainFile.getName())) return true; // Check "src" prefix. if (Loc.isValid()) return NoSanitizeL.containsLocation(Kind, Loc); // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. return NoSanitizeL.containsFile(Kind, MainFile.getName()); } bool CodeGenModule::isInNoSanitizeList(SanitizerMask Kind, llvm::GlobalVariable *GV, SourceLocation Loc, QualType Ty, StringRef Category) const { const auto &NoSanitizeL = getContext().getNoSanitizeList(); if (NoSanitizeL.containsGlobal(Kind, GV->getName(), Category)) return true; auto &SM = Context.getSourceManager(); if (NoSanitizeL.containsMainFile( Kind, SM.getFileEntryRefForID(SM.getMainFileID())->getName(), Category)) return true; if (NoSanitizeL.containsLocation(Kind, Loc, Category)) return true; // Check global type. if (!Ty.isNull()) { // Drill down the array types: if global variable of a fixed type is // not sanitized, we also don't instrument arrays of them. while (auto AT = dyn_cast(Ty.getTypePtr())) Ty = AT->getElementType(); Ty = Ty.getCanonicalType().getUnqualifiedType(); // Only record types (classes, structs etc.) are ignored. if (Ty->isRecordType()) { std::string TypeStr = Ty.getAsString(getContext().getPrintingPolicy()); if (NoSanitizeL.containsType(Kind, TypeStr, Category)) return true; } } return false; } bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, StringRef Category) const { const auto &XRayFilter = getContext().getXRayFilter(); using ImbueAttr = XRayFunctionFilter::ImbueAttribute; auto Attr = ImbueAttr::NONE; if (Loc.isValid()) Attr = XRayFilter.shouldImbueLocation(Loc, Category); if (Attr == ImbueAttr::NONE) Attr = XRayFilter.shouldImbueFunction(Fn->getName()); switch (Attr) { case ImbueAttr::NONE: return false; case ImbueAttr::ALWAYS: Fn->addFnAttr("function-instrument", "xray-always"); break; case ImbueAttr::ALWAYS_ARG1: Fn->addFnAttr("function-instrument", "xray-always"); Fn->addFnAttr("xray-log-args", "1"); break; case ImbueAttr::NEVER: Fn->addFnAttr("function-instrument", "xray-never"); break; } return true; } ProfileList::ExclusionType CodeGenModule::isFunctionBlockedByProfileList(llvm::Function *Fn, SourceLocation Loc) const { const auto &ProfileList = getContext().getProfileList(); // If the profile list is empty, then instrument everything. if (ProfileList.isEmpty()) return ProfileList::Allow; CodeGenOptions::ProfileInstrKind Kind = getCodeGenOpts().getProfileInstr(); // First, check the function name. if (auto V = ProfileList.isFunctionExcluded(Fn->getName(), Kind)) return *V; // Next, check the source location. if (Loc.isValid()) if (auto V = ProfileList.isLocationExcluded(Loc, Kind)) return *V; // If location is unknown, this may be a compiler-generated function. Assume // it's located in the main file. auto &SM = Context.getSourceManager(); if (auto MainFile = SM.getFileEntryRefForID(SM.getMainFileID())) if (auto V = ProfileList.isFileExcluded(MainFile->getName(), Kind)) return *V; return ProfileList.getDefault(Kind); } ProfileList::ExclusionType CodeGenModule::isFunctionBlockedFromProfileInstr(llvm::Function *Fn, SourceLocation Loc) const { auto V = isFunctionBlockedByProfileList(Fn, Loc); if (V != ProfileList::Allow) return V; auto NumGroups = getCodeGenOpts().ProfileTotalFunctionGroups; if (NumGroups > 1) { auto Group = llvm::crc32(arrayRefFromStringRef(Fn->getName())) % NumGroups; if (Group != getCodeGenOpts().ProfileSelectedFunctionGroup) return ProfileList::Skip; } return ProfileList::Allow; } bool CodeGenModule::MustBeEmitted(const ValueDecl *Global) { // Never defer when EmitAllDecls is specified. if (LangOpts.EmitAllDecls) return true; const auto *VD = dyn_cast(Global); if (VD && ((CodeGenOpts.KeepPersistentStorageVariables && (VD->getStorageDuration() == SD_Static || VD->getStorageDuration() == SD_Thread)) || (CodeGenOpts.KeepStaticConsts && VD->getStorageDuration() == SD_Static && VD->getType().isConstQualified()))) return true; return getContext().DeclMustBeEmitted(Global); } bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) { // In OpenMP 5.0 variables and function may be marked as // device_type(host/nohost) and we should not emit them eagerly unless we sure // that they must be emitted on the host/device. To be sure we need to have // seen a declare target with an explicit mentioning of the function, we know // we have if the level of the declare target attribute is -1. Note that we // check somewhere else if we should emit this at all. if (LangOpts.OpenMP >= 50 && !LangOpts.OpenMPSimd) { std::optional ActiveAttr = OMPDeclareTargetDeclAttr::getActiveAttr(Global); if (!ActiveAttr || (*ActiveAttr)->getLevel() != (unsigned)-1) return false; } if (const auto *FD = dyn_cast(Global)) { if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) // Implicit template instantiations may change linkage if they are later // explicitly instantiated, so they should not be emitted eagerly. return false; // Defer until all versions have been semantically checked. if (FD->hasAttr() && !FD->isMultiVersion()) return false; } if (const auto *VD = dyn_cast(Global)) { if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::WeakUnknown) // A definition of an inline constexpr static data member may change // linkage later if it's redeclared outside the class. return false; if (CXX20ModuleInits && VD->getOwningModule() && !VD->getOwningModule()->isModuleMapModule()) { // For CXX20, module-owned initializers need to be deferred, since it is // not known at this point if they will be run for the current module or // as part of the initializer for an imported one. return false; } } // If OpenMP is enabled and threadprivates must be generated like TLS, delay // codegen for global variables, because they may be marked as threadprivate. if (LangOpts.OpenMP && LangOpts.OpenMPUseTLS && getContext().getTargetInfo().isTLSSupported() && isa(Global) && !Global->getType().isConstantStorage(getContext(), false, false) && !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Global)) return false; return true; } ConstantAddress CodeGenModule::GetAddrOfMSGuidDecl(const MSGuidDecl *GD) { StringRef Name = getMangledName(GD); // The UUID descriptor should be pointer aligned. CharUnits Alignment = CharUnits::fromQuantity(PointerAlignInBytes); // Look for an existing global. if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) return ConstantAddress(GV, GV->getValueType(), Alignment); ConstantEmitter Emitter(*this); llvm::Constant *Init; APValue &V = GD->getAsAPValue(); if (!V.isAbsent()) { // If possible, emit the APValue version of the initializer. In particular, // this gets the type of the constant right. Init = Emitter.emitForInitializer( GD->getAsAPValue(), GD->getType().getAddressSpace(), GD->getType()); } else { // As a fallback, directly construct the constant. // FIXME: This may get padding wrong under esoteric struct layout rules. // MSVC appears to create a complete type 'struct __s_GUID' that it // presumably uses to represent these constants. MSGuidDecl::Parts Parts = GD->getParts(); llvm::Constant *Fields[4] = { llvm::ConstantInt::get(Int32Ty, Parts.Part1), llvm::ConstantInt::get(Int16Ty, Parts.Part2), llvm::ConstantInt::get(Int16Ty, Parts.Part3), llvm::ConstantDataArray::getRaw( StringRef(reinterpret_cast(Parts.Part4And5), 8), 8, Int8Ty)}; Init = llvm::ConstantStruct::getAnon(Fields); } auto *GV = new llvm::GlobalVariable( getModule(), Init->getType(), /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name); if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); setDSOLocal(GV); if (!V.isAbsent()) { Emitter.finalize(GV); return ConstantAddress(GV, GV->getValueType(), Alignment); } llvm::Type *Ty = getTypes().ConvertTypeForMem(GD->getType()); return ConstantAddress(GV, Ty, Alignment); } ConstantAddress CodeGenModule::GetAddrOfUnnamedGlobalConstantDecl( const UnnamedGlobalConstantDecl *GCD) { CharUnits Alignment = getContext().getTypeAlignInChars(GCD->getType()); llvm::GlobalVariable **Entry = nullptr; Entry = &UnnamedGlobalConstantDeclMap[GCD]; if (*Entry) return ConstantAddress(*Entry, (*Entry)->getValueType(), Alignment); ConstantEmitter Emitter(*this); llvm::Constant *Init; const APValue &V = GCD->getValue(); assert(!V.isAbsent()); Init = Emitter.emitForInitializer(V, GCD->getType().getAddressSpace(), GCD->getType()); auto *GV = new llvm::GlobalVariable(getModule(), Init->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Init, ".constant"); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); GV->setAlignment(Alignment.getAsAlign()); Emitter.finalize(GV); *Entry = GV; return ConstantAddress(GV, GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetAddrOfTemplateParamObject( const TemplateParamObjectDecl *TPO) { StringRef Name = getMangledName(TPO); CharUnits Alignment = getNaturalTypeAlignment(TPO->getType()); if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name)) return ConstantAddress(GV, GV->getValueType(), Alignment); ConstantEmitter Emitter(*this); llvm::Constant *Init = Emitter.emitForInitializer( TPO->getValue(), TPO->getType().getAddressSpace(), TPO->getType()); if (!Init) { ErrorUnsupported(TPO, "template parameter object"); return ConstantAddress::invalid(); } llvm::GlobalValue::LinkageTypes Linkage = isExternallyVisible(TPO->getLinkageAndVisibility().getLinkage()) ? llvm::GlobalValue::LinkOnceODRLinkage : llvm::GlobalValue::InternalLinkage; auto *GV = new llvm::GlobalVariable(getModule(), Init->getType(), /*isConstant=*/true, Linkage, Init, Name); setGVProperties(GV, TPO); if (supportsCOMDAT()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); Emitter.finalize(GV); return ConstantAddress(GV, GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) { const AliasAttr *AA = VD->getAttr(); assert(AA && "No alias?"); CharUnits Alignment = getContext().getDeclAlign(VD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(VD->getType()); // See if there is already something with the target's name in the module. llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee()); if (Entry) return ConstantAddress(Entry, DeclTy, Alignment); llvm::Constant *Aliasee; if (isa(DeclTy)) Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GlobalDecl(cast(VD)), /*ForVTable=*/false); else Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, nullptr); auto *F = cast(Aliasee); F->setLinkage(llvm::Function::ExternalWeakLinkage); WeakRefReferences.insert(F); return ConstantAddress(Aliasee, DeclTy, Alignment); } template static bool hasImplicitAttr(const ValueDecl *D) { if (!D) return false; if (auto *A = D->getAttr()) return A->isImplicit(); return D->isImplicit(); } bool CodeGenModule::shouldEmitCUDAGlobalVar(const VarDecl *Global) const { assert(LangOpts.CUDA && "Should not be called by non-CUDA languages"); // We need to emit host-side 'shadows' for all global // device-side variables because the CUDA runtime needs their // size and host-side address in order to provide access to // their device-side incarnations. return !LangOpts.CUDAIsDevice || Global->hasAttr() || Global->hasAttr() || Global->hasAttr() || Global->getType()->isCUDADeviceBuiltinSurfaceType() || Global->getType()->isCUDADeviceBuiltinTextureType(); } void CodeGenModule::EmitGlobal(GlobalDecl GD) { const auto *Global = cast(GD.getDecl()); // Weak references don't produce any output by themselves. if (Global->hasAttr()) return; // If this is an alias definition (which otherwise looks like a declaration) // emit it now. if (Global->hasAttr()) return EmitAliasDefinition(GD); // IFunc like an alias whose value is resolved at runtime by calling resolver. if (Global->hasAttr()) return emitIFuncDefinition(GD); // If this is a cpu_dispatch multiversion function, emit the resolver. if (Global->hasAttr()) return emitCPUDispatchDefinition(GD); // If this is CUDA, be selective about which declarations we emit. // Non-constexpr non-lambda implicit host device functions are not emitted // unless they are used on device side. if (LangOpts.CUDA) { assert((isa(Global) || isa(Global)) && "Expected Variable or Function"); if (const auto *VD = dyn_cast(Global)) { if (!shouldEmitCUDAGlobalVar(VD)) return; } else if (LangOpts.CUDAIsDevice) { const auto *FD = dyn_cast(Global); if ((!Global->hasAttr() || (LangOpts.OffloadImplicitHostDeviceTemplates && hasImplicitAttr(FD) && hasImplicitAttr(FD) && !FD->isConstexpr() && !isLambdaCallOperator(FD) && !getContext().CUDAImplicitHostDeviceFunUsedByDevice.count(FD))) && !Global->hasAttr() && !(LangOpts.HIPStdPar && isa(Global) && !Global->hasAttr())) return; // Device-only functions are the only things we skip. } else if (!Global->hasAttr() && Global->hasAttr()) return; } if (LangOpts.OpenMP) { // If this is OpenMP, check if it is legal to emit this global normally. if (OpenMPRuntime && OpenMPRuntime->emitTargetGlobal(GD)) return; if (auto *DRD = dyn_cast(Global)) { if (MustBeEmitted(Global)) EmitOMPDeclareReduction(DRD); return; } if (auto *DMD = dyn_cast(Global)) { if (MustBeEmitted(Global)) EmitOMPDeclareMapper(DMD); return; } } // Ignore declarations, they will be emitted on their first use. if (const auto *FD = dyn_cast(Global)) { // Update deferred annotations with the latest declaration if the function // function was already used or defined. if (FD->hasAttr()) { StringRef MangledName = getMangledName(GD); if (GetGlobalValue(MangledName)) DeferredAnnotations[MangledName] = FD; } // Forward declarations are emitted lazily on first use. if (!FD->doesThisDeclarationHaveABody()) { if (!FD->doesDeclarationForceExternallyVisibleDefinition() && (!FD->isMultiVersion() || !getTarget().getTriple().isAArch64())) return; StringRef MangledName = getMangledName(GD); // Compute the function info and LLVM type. const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::Type *Ty = getTypes().GetFunctionType(FI); GetOrCreateLLVMFunction(MangledName, Ty, GD, /*ForVTable=*/false, /*DontDefer=*/false); return; } } else { const auto *VD = cast(Global); assert(VD->isFileVarDecl() && "Cannot emit local var decl as global."); if (VD->isThisDeclarationADefinition() != VarDecl::Definition && !Context.isMSStaticDataMemberInlineDefinition(VD)) { if (LangOpts.OpenMP) { // Emit declaration of the must-be-emitted declare target variable. if (std::optional Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { // If this variable has external storage and doesn't require special // link handling we defer to its canonical definition. if (VD->hasExternalStorage() && Res != OMPDeclareTargetDeclAttr::MT_Link) return; bool UnifiedMemoryEnabled = getOpenMPRuntime().hasRequiresUnifiedSharedMemory(); if ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !UnifiedMemoryEnabled) { (void)GetAddrOfGlobalVar(VD); } else { assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && UnifiedMemoryEnabled)) && "Link clause or to clause with unified memory expected."); (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); } return; } } // If this declaration may have caused an inline variable definition to // change linkage, make sure that it's emitted. if (Context.getInlineVariableDefinitionKind(VD) == ASTContext::InlineVariableDefinitionKind::Strong) GetAddrOfGlobalVar(VD); return; } } // Defer code generation to first use when possible, e.g. if this is an inline // function. If the global must always be emitted, do it eagerly if possible // to benefit from cache locality. if (MustBeEmitted(Global) && MayBeEmittedEagerly(Global)) { // Emit the definition if it can't be deferred. EmitGlobalDefinition(GD); addEmittedDeferredDecl(GD); return; } // If we're deferring emission of a C++ variable with an // initializer, remember the order in which it appeared in the file. if (getLangOpts().CPlusPlus && isa(Global) && cast(Global)->hasInit()) { DelayedCXXInitPosition[Global] = CXXGlobalInits.size(); CXXGlobalInits.push_back(nullptr); } StringRef MangledName = getMangledName(GD); if (GetGlobalValue(MangledName) != nullptr) { // The value has already been used and should therefore be emitted. addDeferredDeclToEmit(GD); } else if (MustBeEmitted(Global)) { // The value must be emitted, but cannot be emitted eagerly. assert(!MayBeEmittedEagerly(Global)); addDeferredDeclToEmit(GD); } else { // Otherwise, remember that we saw a deferred decl with this name. The // first use of the mangled name will cause it to move into // DeferredDeclsToEmit. DeferredDecls[MangledName] = GD; } } // Check if T is a class type with a destructor that's not dllimport. static bool HasNonDllImportDtor(QualType T) { if (const auto *RT = T->getBaseElementTypeUnsafe()->getAs()) if (CXXRecordDecl *RD = dyn_cast(RT->getDecl())) if (RD->getDestructor() && !RD->getDestructor()->hasAttr()) return true; return false; } namespace { struct FunctionIsDirectlyRecursive : public ConstStmtVisitor { const StringRef Name; const Builtin::Context &BI; FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) : Name(N), BI(C) {} bool VisitCallExpr(const CallExpr *E) { const FunctionDecl *FD = E->getDirectCallee(); if (!FD) return false; AsmLabelAttr *Attr = FD->getAttr(); if (Attr && Name == Attr->getLabel()) return true; unsigned BuiltinID = FD->getBuiltinID(); if (!BuiltinID || !BI.isLibFunction(BuiltinID)) return false; StringRef BuiltinName = BI.getName(BuiltinID); if (BuiltinName.starts_with("__builtin_") && Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) { return true; } return false; } bool VisitStmt(const Stmt *S) { for (const Stmt *Child : S->children()) if (Child && this->Visit(Child)) return true; return false; } }; // Make sure we're not referencing non-imported vars or functions. struct DLLImportFunctionVisitor : public RecursiveASTVisitor { bool SafeToInline = true; bool shouldVisitImplicitCode() const { return true; } bool VisitVarDecl(VarDecl *VD) { if (VD->getTLSKind()) { // A thread-local variable cannot be imported. SafeToInline = false; return SafeToInline; } // A variable definition might imply a destructor call. if (VD->isThisDeclarationADefinition()) SafeToInline = !HasNonDllImportDtor(VD->getType()); return SafeToInline; } bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { if (const auto *D = E->getTemporary()->getDestructor()) SafeToInline = D->hasAttr(); return SafeToInline; } bool VisitDeclRefExpr(DeclRefExpr *E) { ValueDecl *VD = E->getDecl(); if (isa(VD)) SafeToInline = VD->hasAttr(); else if (VarDecl *V = dyn_cast(VD)) SafeToInline = !V->hasGlobalStorage() || V->hasAttr(); return SafeToInline; } bool VisitCXXConstructExpr(CXXConstructExpr *E) { SafeToInline = E->getConstructor()->hasAttr(); return SafeToInline; } bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { CXXMethodDecl *M = E->getMethodDecl(); if (!M) { // Call through a pointer to member function. This is safe to inline. SafeToInline = true; } else { SafeToInline = M->hasAttr(); } return SafeToInline; } bool VisitCXXDeleteExpr(CXXDeleteExpr *E) { SafeToInline = E->getOperatorDelete()->hasAttr(); return SafeToInline; } bool VisitCXXNewExpr(CXXNewExpr *E) { SafeToInline = E->getOperatorNew()->hasAttr(); return SafeToInline; } }; } // isTriviallyRecursive - Check if this function calls another // decl that, because of the asm attribute or the other decl being a builtin, // ends up pointing to itself. bool CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { StringRef Name; if (getCXXABI().getMangleContext().shouldMangleDeclName(FD)) { // asm labels are a special kind of mangling we have to support. AsmLabelAttr *Attr = FD->getAttr(); if (!Attr) return false; Name = Attr->getLabel(); } else { Name = FD->getName(); } FunctionIsDirectlyRecursive Walker(Name, Context.BuiltinInfo); const Stmt *Body = FD->getBody(); return Body ? Walker.Visit(Body) : false; } bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) return true; const auto *F = cast(GD.getDecl()); if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr()) return false; // We don't import function bodies from other named module units since that // behavior may break ABI compatibility of the current unit. if (const Module *M = F->getOwningModule(); M && M->getTopLevelModule()->isNamedModule() && getContext().getCurrentNamedModule() != M->getTopLevelModule()) { // There are practices to mark template member function as always-inline // and mark the template as extern explicit instantiation but not give // the definition for member function. So we have to emit the function // from explicitly instantiation with always-inline. // // See https://github.com/llvm/llvm-project/issues/86893 for details. // // TODO: Maybe it is better to give it a warning if we call a non-inline // function from other module units which is marked as always-inline. if (!F->isTemplateInstantiation() || !F->hasAttr()) { return false; } } if (F->hasAttr()) return false; if (F->hasAttr() && !F->hasAttr()) { // Check whether it would be safe to inline this dllimport function. DLLImportFunctionVisitor Visitor; Visitor.TraverseFunctionDecl(const_cast(F)); if (!Visitor.SafeToInline) return false; if (const CXXDestructorDecl *Dtor = dyn_cast(F)) { // Implicit destructor invocations aren't captured in the AST, so the // check above can't see them. Check for them manually here. for (const Decl *Member : Dtor->getParent()->decls()) if (isa(Member)) if (HasNonDllImportDtor(cast(Member)->getType())) return false; for (const CXXBaseSpecifier &B : Dtor->getParent()->bases()) if (HasNonDllImportDtor(B.getType())) return false; } } // Inline builtins declaration must be emitted. They often are fortified // functions. if (F->isInlineBuiltinDeclaration()) return true; // PR9614. Avoid cases where the source code is lying to us. An available // externally function should have an equivalent function somewhere else, // but a function that calls itself through asm label/`__builtin_` trickery is // clearly not equivalent to the real implementation. // This happens in glibc's btowc and in some configure checks. return !isTriviallyRecursive(F); } bool CodeGenModule::shouldOpportunisticallyEmitVTables() { return CodeGenOpts.OptimizationLevel > 0; } void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *FD = cast(GD.getDecl()); if (FD->isCPUSpecificMultiVersion()) { auto *Spec = FD->getAttr(); for (unsigned I = 0; I < Spec->cpus_size(); ++I) EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); } else if (auto *TC = FD->getAttr()) { for (unsigned I = 0; I < TC->featuresStrs_size(); ++I) // AArch64 favors the default target version over the clone if any. if ((!TC->isDefaultVersion(I) || !getTarget().getTriple().isAArch64()) && TC->isFirstOfVersion(I)) EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr); // Ensure that the resolver function is also emitted. GetOrCreateMultiVersionResolver(GD); } else EmitGlobalFunctionDefinition(GD, GV); // Defer the resolver emission until we can reason whether the TU // contains a default target version implementation. if (FD->isTargetVersionMultiVersion()) AddDeferredMultiVersionResolverToEmit(GD); } void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); PrettyStackTraceDecl CrashInfo(const_cast(D), D->getLocation(), Context.getSourceManager(), "Generating code for declaration"); if (const auto *FD = dyn_cast(D)) { // At -O0, don't generate IR for functions with available_externally // linkage. if (!shouldEmitFunction(GD)) return; llvm::TimeTraceScope TimeScope("CodeGen Function", [&]() { std::string Name; llvm::raw_string_ostream OS(Name); FD->getNameForDiagnostic(OS, getContext().getPrintingPolicy(), /*Qualified=*/true); return Name; }); if (const auto *Method = dyn_cast(D)) { // Make sure to emit the definition(s) before we emit the thunks. // This is necessary for the generation of certain thunks. if (isa(Method) || isa(Method)) ABI->emitCXXStructor(GD); else if (FD->isMultiVersion()) EmitMultiVersionFunctionDefinition(GD, GV); else EmitGlobalFunctionDefinition(GD, GV); if (Method->isVirtual()) getVTables().EmitThunks(GD); return; } if (FD->isMultiVersion()) return EmitMultiVersionFunctionDefinition(GD, GV); return EmitGlobalFunctionDefinition(GD, GV); } if (const auto *VD = dyn_cast(D)) return EmitGlobalVarDefinition(VD, !VD->hasDefinition()); llvm_unreachable("Invalid argument to EmitGlobalDefinition()"); } static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); static unsigned TargetMVPriority(const TargetInfo &TI, const CodeGenFunction::MultiVersionResolverOption &RO) { unsigned Priority = 0; unsigned NumFeatures = 0; for (StringRef Feat : RO.Conditions.Features) { Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); NumFeatures++; } if (!RO.Conditions.Architecture.empty()) Priority = std::max( Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); Priority += TI.multiVersionFeatureCost() * NumFeatures; return Priority; } // Multiversion functions should be at most 'WeakODRLinkage' so that a different // TU can forward declare the function without causing problems. Particularly // in the cases of CPUDispatch, this causes issues. This also makes sure we // work with internal linkage functions, so that the same function name can be // used with internal linkage in multiple TUs. llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM, GlobalDecl GD) { const FunctionDecl *FD = cast(GD.getDecl()); if (FD->getFormalLinkage() == Linkage::Internal) return llvm::GlobalValue::InternalLinkage; return llvm::GlobalValue::WeakODRLinkage; } void CodeGenModule::emitMultiVersionFunctions() { std::vector MVFuncsToEmit; MultiVersionFuncs.swap(MVFuncsToEmit); for (GlobalDecl GD : MVFuncsToEmit) { const auto *FD = cast(GD.getDecl()); assert(FD && "Expected a FunctionDecl"); auto createFunction = [&](const FunctionDecl *Decl, unsigned MVIdx = 0) { GlobalDecl CurGD{Decl->isDefined() ? Decl->getDefinition() : Decl, MVIdx}; StringRef MangledName = getMangledName(CurGD); llvm::Constant *Func = GetGlobalValue(MangledName); if (!Func) { if (Decl->isDefined()) { EmitGlobalFunctionDefinition(CurGD, nullptr); Func = GetGlobalValue(MangledName); } else { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(CurGD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, ForDefinition); } assert(Func && "This should have just been created"); } return cast(Func); }; // For AArch64, a resolver is only emitted if a function marked with // target_version("default")) or target_clones() is present and defined // in this TU. For other architectures it is always emitted. bool ShouldEmitResolver = !getTarget().getTriple().isAArch64(); SmallVector Options; getContext().forEachMultiversionedFunctionVersion( FD, [&](const FunctionDecl *CurFD) { llvm::SmallVector Feats; bool IsDefined = CurFD->doesThisDeclarationHaveABody(); if (const auto *TA = CurFD->getAttr()) { TA->getAddedFeatures(Feats); llvm::Function *Func = createFunction(CurFD); Options.emplace_back(Func, TA->getArchitecture(), Feats); } else if (const auto *TVA = CurFD->getAttr()) { if (TVA->isDefaultVersion() && IsDefined) ShouldEmitResolver = true; TVA->getFeatures(Feats); llvm::Function *Func = createFunction(CurFD); Options.emplace_back(Func, /*Architecture*/ "", Feats); } else if (const auto *TC = CurFD->getAttr()) { if (IsDefined) ShouldEmitResolver = true; for (unsigned I = 0; I < TC->featuresStrs_size(); ++I) { if (!TC->isFirstOfVersion(I)) continue; llvm::Function *Func = createFunction(CurFD, I); StringRef Architecture; Feats.clear(); if (getTarget().getTriple().isAArch64()) TC->getFeatures(Feats, I); else { StringRef Version = TC->getFeatureStr(I); if (Version.starts_with("arch=")) Architecture = Version.drop_front(sizeof("arch=") - 1); else if (Version != "default") Feats.push_back(Version); } Options.emplace_back(Func, Architecture, Feats); } } else llvm_unreachable("unexpected MultiVersionKind"); }); if (!ShouldEmitResolver) continue; llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD); if (auto *IFunc = dyn_cast(ResolverConstant)) { ResolverConstant = IFunc->getResolver(); if (FD->isTargetClonesMultiVersion() && !getTarget().getTriple().isAArch64()) { std::string MangledName = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); if (!GetGlobalValue(MangledName + ".ifunc")) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); // In prior versions of Clang, the mangling for ifuncs incorrectly // included an .ifunc suffix. This alias is generated for backward // compatibility. It is deprecated, and may be removed in the future. auto *Alias = llvm::GlobalAlias::create( DeclTy, 0, getMultiversionLinkage(*this, GD), MangledName + ".ifunc", IFunc, &getModule()); SetCommonAttributes(FD, Alias); } } } llvm::Function *ResolverFunc = cast(ResolverConstant); ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); if (!ResolverFunc->hasLocalLinkage() && supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); const TargetInfo &TI = getTarget(); llvm::stable_sort( Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); }); CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); } // Ensure that any additions to the deferred decls list caused by emitting a // variant are emitted. This can happen when the variant itself is inline and // calls a function without linkage. if (!MVFuncsToEmit.empty()) EmitDeferred(); // Ensure that any additions to the multiversion funcs list from either the // deferred decls or the multiversion functions themselves are emitted. if (!MultiVersionFuncs.empty()) emitMultiVersionFunctions(); } static void replaceDeclarationWith(llvm::GlobalValue *Old, llvm::Constant *New) { assert(cast(Old)->isDeclaration() && "Not a declaration"); New->takeName(Old); Old->replaceAllUsesWith(New); Old->eraseFromParent(); } void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { const auto *FD = cast(GD.getDecl()); assert(FD && "Not a FunctionDecl?"); assert(FD->isCPUDispatchMultiVersion() && "Not a multiversion function?"); const auto *DD = FD->getAttr(); assert(DD && "Not a cpu_dispatch Function?"); const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); StringRef ResolverName = getMangledName(GD); UpdateMultiVersionNames(GD, FD, ResolverName); llvm::Type *ResolverType; GlobalDecl ResolverGD; if (getTarget().supportsIFunc()) { ResolverType = llvm::FunctionType::get( llvm::PointerType::get(DeclTy, getTypes().getTargetAddressSpace(FD->getType())), false); } else { ResolverType = DeclTy; ResolverGD = GD; } auto *ResolverFunc = cast(GetOrCreateLLVMFunction( ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); SmallVector Options; const TargetInfo &Target = getTarget(); unsigned Index = 0; for (const IdentifierInfo *II : DD->cpus()) { // Get the name of the target function so we can look it up/create it. std::string MangledName = getMangledNameImpl(*this, GD, FD, true) + getCPUSpecificMangling(*this, II->getName()); llvm::Constant *Func = GetGlobalValue(MangledName); if (!Func) { GlobalDecl ExistingDecl = Manglings.lookup(MangledName); if (ExistingDecl.getDecl() && ExistingDecl.getDecl()->getAsFunction()->isDefined()) { EmitGlobalFunctionDefinition(ExistingDecl, nullptr); Func = GetGlobalValue(MangledName); } else { if (!ExistingDecl.getDecl()) ExistingDecl = GD.getWithMultiVersionIndex(Index); Func = GetOrCreateLLVMFunction( MangledName, DeclTy, ExistingDecl, /*ForVTable=*/false, /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), ForDefinition); } } llvm::SmallVector Features; Target.getCPUSpecificCPUDispatchFeatures(II->getName(), Features); llvm::transform(Features, Features.begin(), [](StringRef Str) { return Str.substr(1); }); llvm::erase_if(Features, [&Target](StringRef Feat) { return !Target.validateCpuSupports(Feat); }); Options.emplace_back(cast(Func), StringRef{}, Features); ++Index; } llvm::stable_sort( Options, [](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { return llvm::X86::getCpuSupportsMask(LHS.Conditions.Features) > llvm::X86::getCpuSupportsMask(RHS.Conditions.Features); }); // If the list contains multiple 'default' versions, such as when it contains // 'pentium' and 'generic', don't emit the call to the generic one (since we // always run on at least a 'pentium'). We do this by deleting the 'least // advanced' (read, lowest mangling letter). while (Options.size() > 1 && llvm::all_of(llvm::X86::getCpuSupportsMask( (Options.end() - 2)->Conditions.Features), [](auto X) { return X == 0; })) { StringRef LHSName = (Options.end() - 2)->Function->getName(); StringRef RHSName = (Options.end() - 1)->Function->getName(); if (LHSName.compare(RHSName) < 0) Options.erase(Options.end() - 2); else Options.erase(Options.end() - 1); } CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); if (getTarget().supportsIFunc()) { llvm::GlobalValue::LinkageTypes Linkage = getMultiversionLinkage(*this, GD); auto *IFunc = cast(GetOrCreateMultiVersionResolver(GD)); // Fix up function declarations that were created for cpu_specific before // cpu_dispatch was known if (!isa(IFunc)) { auto *GI = llvm::GlobalIFunc::create(DeclTy, 0, Linkage, "", ResolverFunc, &getModule()); replaceDeclarationWith(IFunc, GI); IFunc = GI; } std::string AliasName = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); llvm::Constant *AliasFunc = GetGlobalValue(AliasName); if (!AliasFunc) { auto *GA = llvm::GlobalAlias::create(DeclTy, 0, Linkage, AliasName, IFunc, &getModule()); SetCommonAttributes(GD, GA); } } } /// Adds a declaration to the list of multi version functions if not present. void CodeGenModule::AddDeferredMultiVersionResolverToEmit(GlobalDecl GD) { const auto *FD = cast(GD.getDecl()); assert(FD && "Not a FunctionDecl?"); if (FD->isTargetVersionMultiVersion() || FD->isTargetClonesMultiVersion()) { std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); if (!DeferredResolversToEmit.insert(MangledName).second) return; } MultiVersionFuncs.push_back(GD); } /// If a dispatcher for the specified mangled name is not in the module, create /// and return it. The dispatcher is either an llvm Function with the specified /// type, or a global ifunc. llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { const auto *FD = cast(GD.getDecl()); assert(FD && "Not a FunctionDecl?"); std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); // Holds the name of the resolver, in ifunc mode this is the ifunc (which has // a separate resolver). std::string ResolverName = MangledName; if (getTarget().supportsIFunc()) { switch (FD->getMultiVersionKind()) { case MultiVersionKind::None: llvm_unreachable("unexpected MultiVersionKind::None for resolver"); case MultiVersionKind::Target: case MultiVersionKind::CPUSpecific: case MultiVersionKind::CPUDispatch: ResolverName += ".ifunc"; break; case MultiVersionKind::TargetClones: case MultiVersionKind::TargetVersion: break; } } else if (FD->isTargetMultiVersion()) { ResolverName += ".resolver"; } // If the resolver has already been created, just return it. This lookup may // yield a function declaration instead of a resolver on AArch64. That is // because we didn't know whether a resolver will be generated when we first // encountered a use of the symbol named after this resolver. Therefore, // targets which support ifuncs should not return here unless we actually // found an ifunc. llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName); if (ResolverGV && (isa(ResolverGV) || !getTarget().supportsIFunc())) return ResolverGV; const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); // The resolver needs to be created. For target and target_clones, defer // creation until the end of the TU. if (FD->isTargetMultiVersion() || FD->isTargetClonesMultiVersion()) AddDeferredMultiVersionResolverToEmit(GD); // For cpu_specific, don't create an ifunc yet because we don't know if the // cpu_dispatch will be emitted in this translation unit. if (getTarget().supportsIFunc() && !FD->isCPUSpecificMultiVersion()) { llvm::Type *ResolverType = llvm::FunctionType::get( llvm::PointerType::get(DeclTy, getTypes().getTargetAddressSpace(FD->getType())), false); llvm::Constant *Resolver = GetOrCreateLLVMFunction( MangledName + ".resolver", ResolverType, GlobalDecl{}, /*ForVTable=*/false); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, 0, getMultiversionLinkage(*this, GD), "", Resolver, &getModule()); GIF->setName(ResolverName); SetCommonAttributes(FD, GIF); if (ResolverGV) replaceDeclarationWith(ResolverGV, GIF); return GIF; } llvm::Constant *Resolver = GetOrCreateLLVMFunction( ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); assert(isa(Resolver) && "Resolver should be created for the first time"); SetCommonAttributes(FD, cast(Resolver)); if (ResolverGV) replaceDeclarationWith(ResolverGV, Resolver); return Resolver; } bool CodeGenModule::shouldDropDLLAttribute(const Decl *D, const llvm::GlobalValue *GV) const { auto SC = GV->getDLLStorageClass(); if (SC == llvm::GlobalValue::DefaultStorageClass) return false; const Decl *MRD = D->getMostRecentDecl(); return (((SC == llvm::GlobalValue::DLLImportStorageClass && !MRD->hasAttr()) || (SC == llvm::GlobalValue::DLLExportStorageClass && !MRD->hasAttr())) && !shouldMapVisibilityToDLLExport(cast(MRD))); } /// GetOrCreateLLVMFunction - If the specified mangled name is not in the /// module, create and return an llvm Function with the specified type. If there /// is something in the module with the specified name, return it potentially /// bitcasted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the function when it is first created. llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( StringRef MangledName, llvm::Type *Ty, GlobalDecl GD, bool ForVTable, bool DontDefer, bool IsThunk, llvm::AttributeList ExtraAttrs, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); std::string NameWithoutMultiVersionMangling; // Any attempts to use a MultiVersion function should result in retrieving // the iFunc instead. Name Mangling will handle the rest of the changes. if (const FunctionDecl *FD = cast_or_null(D)) { // For the device mark the function as one that should be emitted. if (getLangOpts().OpenMPIsTargetDevice && OpenMPRuntime && !OpenMPRuntime->markAsGlobalTarget(GD) && FD->isDefined() && !DontDefer && !IsForDefinition) { if (const FunctionDecl *FDDef = FD->getDefinition()) { GlobalDecl GDDef; if (const auto *CD = dyn_cast(FDDef)) GDDef = GlobalDecl(CD, GD.getCtorType()); else if (const auto *DD = dyn_cast(FDDef)) GDDef = GlobalDecl(DD, GD.getDtorType()); else GDDef = GlobalDecl(FDDef); EmitGlobal(GDDef); } } if (FD->isMultiVersion()) { UpdateMultiVersionNames(GD, FD, MangledName); if (!IsForDefinition) { // On AArch64 we do not immediatelly emit an ifunc resolver when a // function is used. Instead we defer the emission until we see a // default definition. In the meantime we just reference the symbol // without FMV mangling (it may or may not be replaced later). if (getTarget().getTriple().isAArch64()) { AddDeferredMultiVersionResolverToEmit(GD); NameWithoutMultiVersionMangling = getMangledNameImpl( *this, GD, FD, /*OmitMultiVersionMangling=*/true); } else return GetOrCreateMultiVersionResolver(GD); } } } if (!NameWithoutMultiVersionMangling.empty()) MangledName = NameWithoutMultiVersionMangling; // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry) { if (WeakRefReferences.erase(Entry)) { const FunctionDecl *FD = cast_or_null(D); if (FD && !FD->hasAttr()) Entry->setLinkage(llvm::Function::ExternalLinkage); } // Handle dropped DLL attributes. if (D && shouldDropDLLAttribute(D, Entry)) { Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); setDSOLocal(Entry); } // If there are two attempts to define the same mangled name, issue an // error. if (IsForDefinition && !Entry->isDeclaration()) { GlobalDecl OtherGD; // Check that GD is not yet in DiagnosedConflictingDefinitions is required // to make sure that we issue an error only once. if (lookupRepresentativeDecl(MangledName, OtherGD) && (GD.getCanonicalDecl().getDecl() != OtherGD.getCanonicalDecl().getDecl()) && DiagnosedConflictingDefinitions.insert(GD).second) { getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } } if ((isa(Entry) || isa(Entry)) && (Entry->getValueType() == Ty)) { return Entry; } // Make sure the result is of the correct type. // (If function is requested for a definition, we always need to create a new // function, not just return a bitcast.) if (!IsForDefinition) return Entry; } // This function doesn't have a complete type (for example, the return // type is an incomplete struct). Use a fake type instead, and make // sure not to try to set attributes. bool IsIncompleteFunction = false; llvm::FunctionType *FTy; if (isa(Ty)) { FTy = cast(Ty); } else { FTy = llvm::FunctionType::get(VoidTy, false); IsIncompleteFunction = true; } llvm::Function *F = llvm::Function::Create(FTy, llvm::Function::ExternalLinkage, Entry ? StringRef() : MangledName, &getModule()); // Store the declaration associated with this function so it is potentially // updated by further declarations or definitions and emitted at the end. if (D && D->hasAttr()) DeferredAnnotations[MangledName] = cast(D); // If we already created a function with the same mangled name (but different // type) before, take its name and add it to the list of functions to be // replaced with F at the end of CodeGen. // // This happens if there is a prototype for a function (e.g. "int f()") and // then a definition of a different type (e.g. "int f(int x)"). if (Entry) { F->takeName(Entry); // This might be an implementation of a function without a prototype, in // which case, try to do special replacement of calls which match the new // prototype. The really key thing here is that we also potentially drop // arguments from the call site so as to make a direct call, which makes the // inliner happier and suppresses a number of optimizer warnings (!) about // dropping arguments. if (!Entry->use_empty()) { ReplaceUsesOfNonProtoTypeWithRealFunction(Entry, F); Entry->removeDeadConstantUsers(); } addGlobalValReplacement(Entry, F); } assert(F->getName() == MangledName && "name was uniqued!"); if (D) SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk); if (ExtraAttrs.hasFnAttrs()) { llvm::AttrBuilder B(F->getContext(), ExtraAttrs.getFnAttrs()); F->addFnAttrs(B); } if (!DontDefer) { // All MSVC dtors other than the base dtor are linkonce_odr and delegate to // each other bottoming out with the base dtor. Therefore we emit non-base // dtors on usage, even if there is no dtor definition in the TU. if (isa_and_nonnull(D) && getCXXABI().useThunkForDtorVariant(cast(D), GD.getDtorType())) addDeferredDeclToEmit(GD); // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end // of the file. auto DDI = DeferredDecls.find(MangledName); if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the // DeferredDeclsToEmit list, and remove it from DeferredDecls (since we // don't need it anymore). addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); // Otherwise, there are cases we have to worry about where we're // using a declaration for which we must emit a definition but where // we might not find a top-level definition: // - member functions defined inline in their classes // - friend functions defined inline in some class // - special member functions with implicit definitions // If we ever change our AST traversal to walk into class methods, // this will be unnecessary. // // We also don't emit a definition for a function if it's going to be an // entry in a vtable, unless it's already marked as used. } else if (getLangOpts().CPlusPlus && D) { // Look for a declaration that's lexically in a record. for (const auto *FD = cast(D)->getMostRecentDecl(); FD; FD = FD->getPreviousDecl()) { if (isa(FD->getLexicalDeclContext())) { if (FD->doesThisDeclarationHaveABody()) { addDeferredDeclToEmit(GD.getWithDecl(FD)); break; } } } } } // Make sure the result is of the requested type. if (!IsIncompleteFunction) { assert(F->getFunctionType() == Ty); return F; } return F; } /// GetAddrOfFunction - Return the address of the given function. If Ty is /// non-null, then this function will use the specified type if it has to /// create it (this occurs when we see a definition of the function). llvm::Constant * CodeGenModule::GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty, bool ForVTable, bool DontDefer, ForDefinition_t IsForDefinition) { // If there was no specific requested type, just convert it now. if (!Ty) { const auto *FD = cast(GD.getDecl()); Ty = getTypes().ConvertType(FD->getType()); } // Devirtualized destructor calls may come through here instead of via // getAddrOfCXXStructor. Make sure we use the MS ABI base destructor instead // of the complete destructor when necessary. if (const auto *DD = dyn_cast(GD.getDecl())) { if (getTarget().getCXXABI().isMicrosoft() && GD.getDtorType() == Dtor_Complete && DD->getParent()->getNumVBases() == 0) GD = GlobalDecl(DD, Dtor_Base); } StringRef MangledName = getMangledName(GD); auto *F = GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer, /*IsThunk=*/false, llvm::AttributeList(), IsForDefinition); // Returns kernel handle for HIP kernel stub function. if (LangOpts.CUDA && !LangOpts.CUDAIsDevice && cast(GD.getDecl())->hasAttr()) { auto *Handle = getCUDARuntime().getKernelHandle( cast(F->stripPointerCasts()), GD); if (IsForDefinition) return F; return Handle; } return F; } llvm::Constant *CodeGenModule::GetFunctionStart(const ValueDecl *Decl) { llvm::GlobalValue *F = cast(GetAddrOfFunction(Decl)->stripPointerCasts()); return llvm::NoCFIValue::get(F); } static const FunctionDecl * GetRuntimeFunctionDecl(ASTContext &C, StringRef Name) { TranslationUnitDecl *TUDecl = C.getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); IdentifierInfo &CII = C.Idents.get(Name); for (const auto *Result : DC->lookup(&CII)) if (const auto *FD = dyn_cast(Result)) return FD; if (!C.getLangOpts().CPlusPlus) return nullptr; // Demangle the premangled name from getTerminateFn() IdentifierInfo &CXXII = (Name == "_ZSt9terminatev" || Name == "?terminate@@YAXXZ") ? C.Idents.get("terminate") : C.Idents.get(Name); for (const auto &N : {"__cxxabiv1", "std"}) { IdentifierInfo &NS = C.Idents.get(N); for (const auto *Result : DC->lookup(&NS)) { const NamespaceDecl *ND = dyn_cast(Result); if (auto *LSD = dyn_cast(Result)) for (const auto *Result : LSD->lookup(&NS)) if ((ND = dyn_cast(Result))) break; if (ND) for (const auto *Result : ND->lookup(&CXXII)) if (const auto *FD = dyn_cast(Result)) return FD; } } return nullptr; } /// CreateRuntimeFunction - Create a new runtime function with the specified /// type and name. llvm::FunctionCallee CodeGenModule::CreateRuntimeFunction(llvm::FunctionType *FTy, StringRef Name, llvm::AttributeList ExtraAttrs, bool Local, bool AssumeConvergent) { if (AssumeConvergent) { ExtraAttrs = ExtraAttrs.addFnAttribute(VMContext, llvm::Attribute::Convergent); } llvm::Constant *C = GetOrCreateLLVMFunction(Name, FTy, GlobalDecl(), /*ForVTable=*/false, /*DontDefer=*/false, /*IsThunk=*/false, ExtraAttrs); if (auto *F = dyn_cast(C)) { if (F->empty()) { F->setCallingConv(getRuntimeCC()); // In Windows Itanium environments, try to mark runtime functions // dllimport. For Mingw and MSVC, don't. We don't really know if the user // will link their standard library statically or dynamically. Marking // functions imported when they are not imported can cause linker errors // and warnings. if (!Local && getTriple().isWindowsItaniumEnvironment() && !getCodeGenOpts().LTOVisibilityPublicStd) { const FunctionDecl *FD = GetRuntimeFunctionDecl(Context, Name); if (!FD || FD->hasAttr()) { F->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); F->setLinkage(llvm::GlobalValue::ExternalLinkage); } } setDSOLocal(F); // FIXME: We should use CodeGenModule::SetLLVMFunctionAttributes() instead // of trying to approximate the attributes using the LLVM function // signature. This requires revising the API of CreateRuntimeFunction(). markRegisterParameterAttributes(F); } } return {FTy, C}; } /// GetOrCreateLLVMGlobal - If the specified mangled name is not in the module, /// create and return an llvm GlobalVariable with the specified type and address /// space. If there is something in the module with the specified name, return /// it potentially bitcasted to the right type. /// /// If D is non-null, it specifies a decl that correspond to this. This is used /// to set the attributes on the global when it is first created. /// /// If IsForDefinition is true, it is guaranteed that an actual global with /// type Ty will be returned, not conversion of a variable with the same /// mangled name but some other type. llvm::Constant * CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, LangAS AddrSpace, const VarDecl *D, ForDefinition_t IsForDefinition) { // Lookup the entry, lazily creating it if necessary. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); unsigned TargetAS = getContext().getTargetAddressSpace(AddrSpace); if (Entry) { if (WeakRefReferences.erase(Entry)) { if (D && !D->hasAttr()) Entry->setLinkage(llvm::Function::ExternalLinkage); } // Handle dropped DLL attributes. if (D && shouldDropDLLAttribute(D, Entry)) Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass); if (LangOpts.OpenMP && !LangOpts.OpenMPSimd && D) getOpenMPRuntime().registerTargetGlobalVariable(D, Entry); if (Entry->getValueType() == Ty && Entry->getAddressSpace() == TargetAS) return Entry; // If there are two attempts to define the same mangled name, issue an // error. if (IsForDefinition && !Entry->isDeclaration()) { GlobalDecl OtherGD; const VarDecl *OtherD; // Check that D is not yet in DiagnosedConflictingDefinitions is required // to make sure that we issue an error only once. if (D && lookupRepresentativeDecl(MangledName, OtherGD) && (D->getCanonicalDecl() != OtherGD.getCanonicalDecl().getDecl()) && (OtherD = dyn_cast(OtherGD.getDecl())) && OtherD->hasInit() && DiagnosedConflictingDefinitions.insert(D).second) { getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; getDiags().Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } } // Make sure the result is of the correct type. if (Entry->getType()->getAddressSpace() != TargetAS) return llvm::ConstantExpr::getAddrSpaceCast( Entry, llvm::PointerType::get(Ty->getContext(), TargetAS)); // (If global is requested for a definition, we always need to create a new // global, not just return a bitcast.) if (!IsForDefinition) return Entry; } auto DAddrSpace = GetGlobalVarAddressSpace(D); auto *GV = new llvm::GlobalVariable( getModule(), Ty, false, llvm::GlobalValue::ExternalLinkage, nullptr, MangledName, nullptr, llvm::GlobalVariable::NotThreadLocal, getContext().getTargetAddressSpace(DAddrSpace)); // If we already created a global with the same mangled name (but different // type) before, take its name and remove it from its parent. if (Entry) { GV->takeName(Entry); if (!Entry->use_empty()) { Entry->replaceAllUsesWith(GV); } Entry->eraseFromParent(); } // This is the first use or definition of a mangled name. If there is a // deferred decl with this name, remember that we need to emit it at the end // of the file. auto DDI = DeferredDecls.find(MangledName); if (DDI != DeferredDecls.end()) { // Move the potentially referenced deferred decl to the DeferredDeclsToEmit // list, and remove it from DeferredDecls (since we don't need it anymore). addDeferredDeclToEmit(DDI->second); DeferredDecls.erase(DDI); } // Handle things which are present even on external declarations. if (D) { if (LangOpts.OpenMP && !LangOpts.OpenMPSimd) getOpenMPRuntime().registerTargetGlobalVariable(D, GV); // FIXME: This code is overly simple and should be merged with other global // handling. GV->setConstant(D->getType().isConstantStorage(getContext(), false, false)); GV->setAlignment(getContext().getDeclAlign(D).getAsAlign()); setLinkageForGV(GV, D); if (D->getTLSKind()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) CXXThreadLocals.push_back(D); setTLSMode(GV, *D); } setGVProperties(GV, D); // If required by the ABI, treat declarations of static data members with // inline initializers as definitions. if (getContext().isMSStaticDataMemberInlineDefinition(D)) { EmitGlobalVarDefinition(D); } // Emit section information for extern variables. if (D->hasExternalStorage()) { if (const SectionAttr *SA = D->getAttr()) GV->setSection(SA->getName()); } // Handle XCore specific ABI requirements. if (getTriple().getArch() == llvm::Triple::xcore && D->getLanguageLinkage() == CLanguageLinkage && D->getType().isConstant(Context) && isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) GV->setSection(".cp.rodata"); // Handle code model attribute if (const auto *CMA = D->getAttr()) GV->setCodeModel(CMA->getModel()); // Check if we a have a const declaration with an initializer, we may be // able to emit it as available_externally to expose it's value to the // optimizer. if (Context.getLangOpts().CPlusPlus && GV->hasExternalLinkage() && D->getType().isConstQualified() && !GV->hasInitializer() && !D->hasDefinition() && D->hasInit() && !D->hasAttr()) { const auto *Record = Context.getBaseElementType(D->getType())->getAsCXXRecordDecl(); bool HasMutableFields = Record && Record->hasMutableFields(); if (!HasMutableFields) { const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); if (InitExpr) { ConstantEmitter emitter(*this); llvm::Constant *Init = emitter.tryEmitForInitializer(*InitDecl); if (Init) { auto *InitType = Init->getType(); if (GV->getValueType() != InitType) { // The type of the initializer does not match the definition. // This happens when an initializer has a different type from // the type of the global (because of padding at the end of a // structure for instance). GV->setName(StringRef()); // Make a new global with the correct type, this is now guaranteed // to work. auto *NewGV = cast( GetAddrOfGlobalVar(D, InitType, IsForDefinition) ->stripPointerCasts()); // Erase the old global, since it is no longer used. GV->eraseFromParent(); GV = NewGV; } else { GV->setInitializer(Init); GV->setConstant(true); GV->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage); } emitter.finalize(GV); } } } } } if (D && D->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly) { getTargetCodeGenInfo().setTargetAttributes(D, GV, *this); // External HIP managed variables needed to be recorded for transformation // in both device and host compilations. if (getLangOpts().CUDA && D && D->hasAttr() && D->hasExternalStorage()) getCUDARuntime().handleVarRegistration(D, *GV); } if (D) SanitizerMD->reportGlobal(GV, *D); LangAS ExpectedAS = D ? D->getType().getAddressSpace() : (LangOpts.OpenCL ? LangAS::opencl_global : LangAS::Default); assert(getContext().getTargetAddressSpace(ExpectedAS) == TargetAS); if (DAddrSpace != ExpectedAS) { return getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, DAddrSpace, ExpectedAS, llvm::PointerType::get(getLLVMContext(), TargetAS)); } return GV; } llvm::Constant * CodeGenModule::GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition) { const Decl *D = GD.getDecl(); if (isa(D) || isa(D)) return getAddrOfCXXStructor(GD, /*FnInfo=*/nullptr, /*FnType=*/nullptr, /*DontDefer=*/false, IsForDefinition); if (isa(D)) { auto FInfo = &getTypes().arrangeCXXMethodDeclaration(cast(D)); auto Ty = getTypes().GetFunctionType(*FInfo); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); } if (isa(D)) { const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false, IsForDefinition); } return GetAddrOfGlobalVar(cast(D), /*Ty=*/nullptr, IsForDefinition); } llvm::GlobalVariable *CodeGenModule::CreateOrReplaceCXXRuntimeVariable( StringRef Name, llvm::Type *Ty, llvm::GlobalValue::LinkageTypes Linkage, llvm::Align Alignment) { llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name); llvm::GlobalVariable *OldGV = nullptr; if (GV) { // Check if the variable has the right type. if (GV->getValueType() == Ty) return GV; // Because C++ name mangling, the only way we can end up with an already // existing global with the same name is if it has been declared extern "C". assert(GV->isDeclaration() && "Declaration has wrong type!"); OldGV = GV; } // Create a new variable. GV = new llvm::GlobalVariable(getModule(), Ty, /*isConstant=*/true, Linkage, nullptr, Name); if (OldGV) { // Replace occurrences of the old variable if needed. GV->takeName(OldGV); if (!OldGV->use_empty()) { OldGV->replaceAllUsesWith(GV); } OldGV->eraseFromParent(); } if (supportsCOMDAT() && GV->isWeakForLinker() && !GV->hasAvailableExternallyLinkage()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); GV->setAlignment(Alignment); return GV; } /// GetAddrOfGlobalVar - Return the llvm::Constant for the address of the /// given global variable. If Ty is non-null and if the global doesn't exist, /// then it will be created with the specified type instead of whatever the /// normal requested type would be. If IsForDefinition is true, it is guaranteed /// that an actual global with type Ty will be returned, not conversion of a /// variable with the same mangled name but some other type. llvm::Constant *CodeGenModule::GetAddrOfGlobalVar(const VarDecl *D, llvm::Type *Ty, ForDefinition_t IsForDefinition) { assert(D->hasGlobalStorage() && "Not a global variable"); QualType ASTTy = D->getType(); if (!Ty) Ty = getTypes().ConvertTypeForMem(ASTTy); StringRef MangledName = getMangledName(D); return GetOrCreateLLVMGlobal(MangledName, Ty, ASTTy.getAddressSpace(), D, IsForDefinition); } /// CreateRuntimeVariable - Create a new runtime global variable with the /// specified type and name. llvm::Constant * CodeGenModule::CreateRuntimeVariable(llvm::Type *Ty, StringRef Name) { LangAS AddrSpace = getContext().getLangOpts().OpenCL ? LangAS::opencl_global : LangAS::Default; auto *Ret = GetOrCreateLLVMGlobal(Name, Ty, AddrSpace, nullptr); setDSOLocal(cast(Ret->stripPointerCasts())); return Ret; } void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) { assert(!D->getInit() && "Cannot emit definite definitions here!"); StringRef MangledName = getMangledName(D); llvm::GlobalValue *GV = GetGlobalValue(MangledName); // We already have a definition, not declaration, with the same mangled name. // Emitting of declaration is not required (and actually overwrites emitted // definition). if (GV && !GV->isDeclaration()) return; // If we have not seen a reference to this variable yet, place it into the // deferred declarations table to be emitted if needed later. if (!MustBeEmitted(D) && !GV) { DeferredDecls[MangledName] = D; return; } // The tentative definition is the only definition. EmitGlobalVarDefinition(D); } void CodeGenModule::EmitExternalDeclaration(const DeclaratorDecl *D) { if (auto const *V = dyn_cast(D)) EmitExternalVarDeclaration(V); if (auto const *FD = dyn_cast(D)) EmitExternalFunctionDeclaration(FD); } CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const { return Context.toCharUnitsFromBits( getDataLayout().getTypeStoreSizeInBits(Ty)); } LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { if (LangOpts.OpenCL) { LangAS AS = D ? D->getType().getAddressSpace() : LangAS::opencl_global; assert(AS == LangAS::opencl_global || AS == LangAS::opencl_global_device || AS == LangAS::opencl_global_host || AS == LangAS::opencl_constant || AS == LangAS::opencl_local || AS >= LangAS::FirstTargetAddressSpace); return AS; } if (LangOpts.SYCLIsDevice && (!D || D->getType().getAddressSpace() == LangAS::Default)) return LangAS::sycl_global; if (LangOpts.CUDA && LangOpts.CUDAIsDevice) { if (D) { if (D->hasAttr()) return LangAS::cuda_constant; if (D->hasAttr()) return LangAS::cuda_shared; if (D->hasAttr()) return LangAS::cuda_device; if (D->getType().isConstQualified()) return LangAS::cuda_constant; } return LangAS::cuda_device; } if (LangOpts.OpenMP) { LangAS AS; if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) return AS; } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } LangAS CodeGenModule::GetGlobalConstantAddressSpace() const { // OpenCL v1.2 s6.5.3: a string literal is in the constant address space. if (LangOpts.OpenCL) return LangAS::opencl_constant; if (LangOpts.SYCLIsDevice) return LangAS::sycl_global; if (LangOpts.HIP && LangOpts.CUDAIsDevice && getTriple().isSPIRV()) // For HIPSPV map literals to cuda_device (maps to CrossWorkGroup in SPIR-V) // instead of default AS (maps to Generic in SPIR-V). Otherwise, we end up // with OpVariable instructions with Generic storage class which is not // allowed (SPIR-V V1.6 s3.42.8). Also, mapping literals to SPIR-V // UniformConstant storage class is not viable as pointers to it may not be // casted to Generic pointers which are used to model HIP's "flat" pointers. return LangAS::cuda_device; if (auto AS = getTarget().getConstantAddressSpace()) return *AS; return LangAS::Default; } // In address space agnostic languages, string literals are in default address // space in AST. However, certain targets (e.g. amdgcn) request them to be // emitted in constant address space in LLVM IR. To be consistent with other // parts of AST, string literal global variables in constant address space // need to be casted to default address space before being put into address // map and referenced by other part of CodeGen. // In OpenCL, string literals are in constant address space in AST, therefore // they should not be casted to default address space. static llvm::Constant * castStringLiteralToDefaultAddressSpace(CodeGenModule &CGM, llvm::GlobalVariable *GV) { llvm::Constant *Cast = GV; if (!CGM.getLangOpts().OpenCL) { auto AS = CGM.GetGlobalConstantAddressSpace(); if (AS != LangAS::Default) Cast = CGM.getTargetCodeGenInfo().performAddrSpaceCast( CGM, GV, AS, LangAS::Default, llvm::PointerType::get( CGM.getLLVMContext(), CGM.getContext().getTargetAddressSpace(LangAS::Default))); } return Cast; } template void CodeGenModule::MaybeHandleStaticInExternC(const SomeDecl *D, llvm::GlobalValue *GV) { if (!getLangOpts().CPlusPlus) return; // Must have 'used' attribute, or else inline assembly can't rely on // the name existing. if (!D->template hasAttr()) return; // Must have internal linkage and an ordinary name. if (!D->getIdentifier() || D->getFormalLinkage() != Linkage::Internal) return; // Must be in an extern "C" context. Entities declared directly within // a record are not extern "C" even if the record is in such a context. const SomeDecl *First = D->getFirstDecl(); if (First->getDeclContext()->isRecord() || !First->isInExternCContext()) return; // OK, this is an internal linkage entity inside an extern "C" linkage // specification. Make a note of that so we can give it the "expected" // mangled name if nothing else is using that name. std::pair R = StaticExternCValues.insert(std::make_pair(D->getIdentifier(), GV)); // If we have multiple internal linkage entities with the same name // in extern "C" regions, none of them gets that name. if (!R.second) R.first->second = nullptr; } static bool shouldBeInCOMDAT(CodeGenModule &CGM, const Decl &D) { if (!CGM.supportsCOMDAT()) return false; if (D.hasAttr()) return true; GVALinkage Linkage; if (auto *VD = dyn_cast(&D)) Linkage = CGM.getContext().GetGVALinkageForVariable(VD); else Linkage = CGM.getContext().GetGVALinkageForFunction(cast(&D)); switch (Linkage) { case GVA_Internal: case GVA_AvailableExternally: case GVA_StrongExternal: return false; case GVA_DiscardableODR: case GVA_StrongODR: return true; } llvm_unreachable("No such linkage"); } bool CodeGenModule::supportsCOMDAT() const { return getTriple().supportsCOMDAT(); } void CodeGenModule::maybeSetTrivialComdat(const Decl &D, llvm::GlobalObject &GO) { if (!shouldBeInCOMDAT(*this, D)) return; GO.setComdat(TheModule.getOrInsertComdat(GO.getName())); } const ABIInfo &CodeGenModule::getABIInfo() { return getTargetCodeGenInfo().getABIInfo(); } /// Pass IsTentative as true if you want to create a tentative definition. void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, bool IsTentative) { // OpenCL global variables of sampler type are translated to function calls, // therefore no need to be translated. QualType ASTTy = D->getType(); if (getLangOpts().OpenCL && ASTTy->isSamplerT()) return; // If this is OpenMP device, check if it is legal to emit this global // normally. if (LangOpts.OpenMPIsTargetDevice && OpenMPRuntime && OpenMPRuntime->emitTargetGlobalVariable(D)) return; llvm::TrackingVH Init; bool NeedsGlobalCtor = false; // Whether the definition of the variable is available externally. // If yes, we shouldn't emit the GloablCtor and GlobalDtor for the variable // since this is the job for its original source. bool IsDefinitionAvailableExternally = getContext().GetGVALinkageForVariable(D) == GVA_AvailableExternally; bool NeedsGlobalDtor = !IsDefinitionAvailableExternally && D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; // It is helpless to emit the definition for an available_externally variable // which can't be marked as const. // We don't need to check if it needs global ctor or dtor. See the above // comment for ideas. if (IsDefinitionAvailableExternally && (!D->hasConstantInitialization() || // TODO: Update this when we have interface to check constexpr // destructor. D->needsDestruction(getContext()) || !D->getType().isConstantStorage(getContext(), true, true))) return; const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); std::optional emitter; // CUDA E.2.4.1 "__shared__ variables cannot have an initialization // as part of their declaration." Sema has already checked for // error cases, so we just need to set Init to UndefValue. bool IsCUDASharedVar = getLangOpts().CUDAIsDevice && D->hasAttr(); // Shadows of initialized device-side global variables are also left // undefined. // Managed Variables should be initialized on both host side and device side. bool IsCUDAShadowVar = !getLangOpts().CUDAIsDevice && !D->hasAttr() && (D->hasAttr() || D->hasAttr() || D->hasAttr()); bool IsCUDADeviceShadowVar = getLangOpts().CUDAIsDevice && !D->hasAttr() && (D->getType()->isCUDADeviceBuiltinSurfaceType() || D->getType()->isCUDADeviceBuiltinTextureType()); if (getLangOpts().CUDA && (IsCUDASharedVar || IsCUDAShadowVar || IsCUDADeviceShadowVar)) Init = llvm::UndefValue::get(getTypes().ConvertTypeForMem(ASTTy)); else if (D->hasAttr()) Init = llvm::UndefValue::get(getTypes().ConvertTypeForMem(ASTTy)); else if (!InitExpr) { // This is a tentative definition; tentative definitions are // implicitly initialized with { 0 }. // // Note that tentative definitions are only emitted at the end of // a translation unit, so they should never have incomplete // type. In addition, EmitTentativeDefinition makes sure that we // never attempt to emit a tentative definition if a real one // exists. A use may still exists, however, so we still may need // to do a RAUW. assert(!ASTTy->isIncompleteType() && "Unexpected incomplete type"); Init = EmitNullConstant(D->getType()); } else { initializedGlobalDecl = GlobalDecl(D); emitter.emplace(*this); llvm::Constant *Initializer = emitter->tryEmitForInitializer(*InitDecl); if (!Initializer) { QualType T = InitExpr->getType(); if (D->getType()->isReferenceType()) T = D->getType(); if (getLangOpts().CPlusPlus) { if (InitDecl->hasFlexibleArrayInit(getContext())) ErrorUnsupported(D, "flexible array initializer"); Init = EmitNullConstant(T); if (!IsDefinitionAvailableExternally) NeedsGlobalCtor = true; } else { ErrorUnsupported(D, "static initializer"); Init = llvm::UndefValue::get(getTypes().ConvertType(T)); } } else { Init = Initializer; // We don't need an initializer, so remove the entry for the delayed // initializer position (just in case this entry was delayed) if we // also don't need to register a destructor. if (getLangOpts().CPlusPlus && !NeedsGlobalDtor) DelayedCXXInitPosition.erase(D); #ifndef NDEBUG CharUnits VarSize = getContext().getTypeSizeInChars(ASTTy) + InitDecl->getFlexibleArrayInitChars(getContext()); CharUnits CstSize = CharUnits::fromQuantity( getDataLayout().getTypeAllocSize(Init->getType())); assert(VarSize == CstSize && "Emitted constant has unexpected size"); #endif } } llvm::Type* InitType = Init->getType(); llvm::Constant *Entry = GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)); // Strip off pointer casts if we got them. Entry = Entry->stripPointerCasts(); // Entry is now either a Function or GlobalVariable. auto *GV = dyn_cast(Entry); // We have a definition after a declaration with the wrong type. // We must make a new GlobalVariable* and update everything that used OldGV // (a declaration or tentative definition) with the new GlobalVariable* // (which will be a definition). // // This happens if there is a prototype for a global (e.g. // "extern int x[];") and then a definition of a different type (e.g. // "int x[10];"). This also happens when an initializer has a different type // from the type of the global (this happens with unions). if (!GV || GV->getValueType() != InitType || GV->getType()->getAddressSpace() != getContext().getTargetAddressSpace(GetGlobalVarAddressSpace(D))) { // Move the old entry aside so that we'll create a new one. Entry->setName(StringRef()); // Make a new global with the correct type, this is now guaranteed to work. GV = cast( GetAddrOfGlobalVar(D, InitType, ForDefinition_t(!IsTentative)) ->stripPointerCasts()); // Replace all uses of the old global with the new global llvm::Constant *NewPtrForOldDecl = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Entry->getType()); Entry->replaceAllUsesWith(NewPtrForOldDecl); // Erase the old global, since it is no longer used. cast(Entry)->eraseFromParent(); } MaybeHandleStaticInExternC(D, GV); if (D->hasAttr()) AddGlobalAnnotations(D, GV); // Set the llvm linkage type as appropriate. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(D); // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on // the device. [...]" // CUDA B.2.2 "The __constant__ qualifier, optionally used together with // __device__, declares a variable that: [...] // Is accessible from all the threads within the grid and from the host // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize() // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())." if (LangOpts.CUDA) { if (LangOpts.CUDAIsDevice) { if (Linkage != llvm::GlobalValue::InternalLinkage && (D->hasAttr() || D->hasAttr() || D->getType()->isCUDADeviceBuiltinSurfaceType() || D->getType()->isCUDADeviceBuiltinTextureType())) GV->setExternallyInitialized(true); } else { getCUDARuntime().internalizeDeviceSideVar(D, Linkage); } getCUDARuntime().handleVarRegistration(D, *GV); } GV->setInitializer(Init); if (emitter) emitter->finalize(GV); // If it is safe to mark the global 'constant', do so now. GV->setConstant(!NeedsGlobalCtor && !NeedsGlobalDtor && D->getType().isConstantStorage(getContext(), true, true)); // If it is in a read-only section, mark it 'constant'. if (const SectionAttr *SA = D->getAttr()) { const ASTContext::SectionInfo &SI = Context.SectionInfos[SA->getName()]; if ((SI.SectionFlags & ASTContext::PSF_Write) == 0) GV->setConstant(true); } CharUnits AlignVal = getContext().getDeclAlign(D); // Check for alignment specifed in an 'omp allocate' directive. if (std::optional AlignValFromAllocate = getOMPAllocateAlignment(D)) AlignVal = *AlignValFromAllocate; GV->setAlignment(AlignVal.getAsAlign()); // On Darwin, unlike other Itanium C++ ABI platforms, the thread-wrapper // function is only defined alongside the variable, not also alongside // callers. Normally, all accesses to a thread_local go through the // thread-wrapper in order to ensure initialization has occurred, underlying // variable will never be used other than the thread-wrapper, so it can be // converted to internal linkage. // // However, if the variable has the 'constinit' attribute, it _can_ be // referenced directly, without calling the thread-wrapper, so the linkage // must not be changed. // // Additionally, if the variable isn't plain external linkage, e.g. if it's // weak or linkonce, the de-duplication semantics are important to preserve, // so we don't change the linkage. if (D->getTLSKind() == VarDecl::TLS_Dynamic && Linkage == llvm::GlobalValue::ExternalLinkage && Context.getTargetInfo().getTriple().isOSDarwin() && !D->hasAttr()) Linkage = llvm::GlobalValue::InternalLinkage; GV->setLinkage(Linkage); if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLImportStorageClass); else if (D->hasAttr()) GV->setDLLStorageClass(llvm::GlobalVariable::DLLExportStorageClass); else GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); if (Linkage == llvm::GlobalVariable::CommonLinkage) { // common vars aren't constant even if declared const. GV->setConstant(false); // Tentative definition of global variables may be initialized with // non-zero null pointers. In this case they should have weak linkage // since common linkage must have zero initializer and must not have // explicit section therefore cannot have non-zero initial value. if (!GV->getInitializer()->isNullValue()) GV->setLinkage(llvm::GlobalVariable::WeakAnyLinkage); } setNonAliasAttributes(D, GV); if (D->getTLSKind() && !GV->isThreadLocal()) { if (D->getTLSKind() == VarDecl::TLS_Dynamic) CXXThreadLocals.push_back(D); setTLSMode(GV, *D); } maybeSetTrivialComdat(*D, *GV); // Emit the initializer function if necessary. if (NeedsGlobalCtor || NeedsGlobalDtor) EmitCXXGlobalVarDeclInitFunc(D, GV, NeedsGlobalCtor); SanitizerMD->reportGlobal(GV, *D, NeedsGlobalCtor); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) DI->EmitGlobalVariable(GV, D); } void CodeGenModule::EmitExternalVarDeclaration(const VarDecl *D) { if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) { QualType ASTTy = D->getType(); llvm::Type *Ty = getTypes().ConvertTypeForMem(D->getType()); llvm::Constant *GV = GetOrCreateLLVMGlobal(D->getName(), Ty, ASTTy.getAddressSpace(), D); DI->EmitExternalVariable( cast(GV->stripPointerCasts()), D); } } void CodeGenModule::EmitExternalFunctionDeclaration(const FunctionDecl *FD) { if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) { auto *Ty = getTypes().ConvertType(FD->getType()); StringRef MangledName = getMangledName(FD); auto *Fn = dyn_cast( GetOrCreateLLVMFunction(MangledName, Ty, FD, /* ForVTable */ false)); if (!Fn->getSubprogram()) DI->EmitFunctionDecl(FD, FD->getLocation(), FD->getType(), Fn); } } static bool isVarDeclStrongDefinition(const ASTContext &Context, CodeGenModule &CGM, const VarDecl *D, bool NoCommon) { // Don't give variables common linkage if -fno-common was specified unless it // was overridden by a NoCommon attribute. if ((NoCommon || D->hasAttr()) && !D->hasAttr()) return true; // C11 6.9.2/2: // A declaration of an identifier for an object that has file scope without // an initializer, and without a storage-class specifier or with the // storage-class specifier static, constitutes a tentative definition. if (D->getInit() || D->hasExternalStorage()) return true; // A variable cannot be both common and exist in a section. if (D->hasAttr()) return true; // A variable cannot be both common and exist in a section. // We don't try to determine which is the right section in the front-end. // If no specialized section name is applicable, it will resort to default. if (D->hasAttr() || D->hasAttr() || D->hasAttr() || D->hasAttr()) return true; // Thread local vars aren't considered common linkage. if (D->getTLSKind()) return true; // Tentative definitions marked with WeakImportAttr are true definitions. if (D->hasAttr()) return true; // A variable cannot be both common and exist in a comdat. if (shouldBeInCOMDAT(CGM, *D)) return true; // Declarations with a required alignment do not have common linkage in MSVC // mode. if (Context.getTargetInfo().getCXXABI().isMicrosoft()) { if (D->hasAttr()) return true; QualType VarType = D->getType(); if (Context.isAlignmentRequired(VarType)) return true; if (const auto *RT = VarType->getAs()) { const RecordDecl *RD = RT->getDecl(); for (const FieldDecl *FD : RD->fields()) { if (FD->isBitField()) continue; if (FD->hasAttr()) return true; if (Context.isAlignmentRequired(FD->getType())) return true; } } } // Microsoft's link.exe doesn't support alignments greater than 32 bytes for // common symbols, so symbols with greater alignment requirements cannot be // common. // Other COFF linkers (ld.bfd and LLD) support arbitrary power-of-two // alignments for common symbols via the aligncomm directive, so this // restriction only applies to MSVC environments. if (Context.getTargetInfo().getTriple().isKnownWindowsMSVCEnvironment() && Context.getTypeAlignIfKnown(D->getType()) > Context.toBits(CharUnits::fromQuantity(32))) return true; return false; } llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(const DeclaratorDecl *D, GVALinkage Linkage) { if (Linkage == GVA_Internal) return llvm::Function::InternalLinkage; if (D->hasAttr()) return llvm::GlobalVariable::WeakAnyLinkage; if (const auto *FD = D->getAsFunction()) if (FD->isMultiVersion() && Linkage == GVA_AvailableExternally) return llvm::GlobalVariable::LinkOnceAnyLinkage; // We are guaranteed to have a strong definition somewhere else, // so we can use available_externally linkage. if (Linkage == GVA_AvailableExternally) return llvm::GlobalValue::AvailableExternallyLinkage; // Note that Apple's kernel linker doesn't support symbol // coalescing, so we need to avoid linkonce and weak linkages there. // Normally, this means we just map to internal, but for explicit // instantiations we'll map to external. // In C++, the compiler has to emit a definition in every translation unit // that references the function. We should use linkonce_odr because // a) if all references in this translation unit are optimized away, we // don't need to codegen it. b) if the function persists, it needs to be // merged with other definitions. c) C++ has the ODR, so we know the // definition is dependable. if (Linkage == GVA_DiscardableODR) return !Context.getLangOpts().AppleKext ? llvm::Function::LinkOnceODRLinkage : llvm::Function::InternalLinkage; // An explicit instantiation of a template has weak linkage, since // explicit instantiations can occur in multiple translation units // and must all be equivalent. However, we are not allowed to // throw away these explicit instantiations. // // CUDA/HIP: For -fno-gpu-rdc case, device code is limited to one TU, // so say that CUDA templates are either external (for kernels) or internal. // This lets llvm perform aggressive inter-procedural optimizations. For // -fgpu-rdc case, device function calls across multiple TU's are allowed, // therefore we need to follow the normal linkage paradigm. if (Linkage == GVA_StrongODR) { if (getLangOpts().AppleKext) return llvm::Function::ExternalLinkage; if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && !getLangOpts().GPURelocatableDeviceCode) return D->hasAttr() ? llvm::Function::ExternalLinkage : llvm::Function::InternalLinkage; return llvm::Function::WeakODRLinkage; } // C++ doesn't have tentative definitions and thus cannot have common // linkage. if (!getLangOpts().CPlusPlus && isa(D) && !isVarDeclStrongDefinition(Context, *this, cast(D), CodeGenOpts.NoCommon)) return llvm::GlobalVariable::CommonLinkage; // selectany symbols are externally visible, so use weak instead of // linkonce. MSVC optimizes away references to const selectany globals, so // all definitions should be the same and ODR linkage should be used. // http://msdn.microsoft.com/en-us/library/5tkz6s71.aspx if (D->hasAttr()) return llvm::GlobalVariable::WeakODRLinkage; // Otherwise, we have strong external linkage. assert(Linkage == GVA_StrongExternal); return llvm::GlobalVariable::ExternalLinkage; } llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageVarDefinition(const VarDecl *VD) { GVALinkage Linkage = getContext().GetGVALinkageForVariable(VD); return getLLVMLinkageForDeclarator(VD, Linkage); } /// Replace the uses of a function that was declared with a non-proto type. /// We want to silently drop extra arguments from call sites static void replaceUsesOfNonProtoConstant(llvm::Constant *old, llvm::Function *newFn) { // Fast path. if (old->use_empty()) return; llvm::Type *newRetTy = newFn->getReturnType(); SmallVector newArgs; SmallVector callSitesToBeRemovedFromParent; for (llvm::Value::use_iterator ui = old->use_begin(), ue = old->use_end(); ui != ue; ui++) { llvm::User *user = ui->getUser(); // Recognize and replace uses of bitcasts. Most calls to // unprototyped functions will use bitcasts. if (auto *bitcast = dyn_cast(user)) { if (bitcast->getOpcode() == llvm::Instruction::BitCast) replaceUsesOfNonProtoConstant(bitcast, newFn); continue; } // Recognize calls to the function. llvm::CallBase *callSite = dyn_cast(user); if (!callSite) continue; if (!callSite->isCallee(&*ui)) continue; // If the return types don't match exactly, then we can't // transform this call unless it's dead. if (callSite->getType() != newRetTy && !callSite->use_empty()) continue; // Get the call site's attribute list. SmallVector newArgAttrs; llvm::AttributeList oldAttrs = callSite->getAttributes(); // If the function was passed too few arguments, don't transform. unsigned newNumArgs = newFn->arg_size(); if (callSite->arg_size() < newNumArgs) continue; // If extra arguments were passed, we silently drop them. // If any of the types mismatch, we don't transform. unsigned argNo = 0; bool dontTransform = false; for (llvm::Argument &A : newFn->args()) { if (callSite->getArgOperand(argNo)->getType() != A.getType()) { dontTransform = true; break; } // Add any parameter attributes. newArgAttrs.push_back(oldAttrs.getParamAttrs(argNo)); argNo++; } if (dontTransform) continue; // Okay, we can transform this. Create the new call instruction and copy // over the required information. newArgs.append(callSite->arg_begin(), callSite->arg_begin() + argNo); // Copy over any operand bundles. SmallVector newBundles; callSite->getOperandBundlesAsDefs(newBundles); llvm::CallBase *newCall; if (isa(callSite)) { newCall = llvm::CallInst::Create(newFn, newArgs, newBundles, "", callSite); } else { auto *oldInvoke = cast(callSite); newCall = llvm::InvokeInst::Create(newFn, oldInvoke->getNormalDest(), oldInvoke->getUnwindDest(), newArgs, newBundles, "", callSite); } newArgs.clear(); // for the next iteration if (!newCall->getType()->isVoidTy()) newCall->takeName(callSite); newCall->setAttributes( llvm::AttributeList::get(newFn->getContext(), oldAttrs.getFnAttrs(), oldAttrs.getRetAttrs(), newArgAttrs)); newCall->setCallingConv(callSite->getCallingConv()); // Finally, remove the old call, replacing any uses with the new one. if (!callSite->use_empty()) callSite->replaceAllUsesWith(newCall); // Copy debug location attached to CI. if (callSite->getDebugLoc()) newCall->setDebugLoc(callSite->getDebugLoc()); callSitesToBeRemovedFromParent.push_back(callSite); } for (auto *callSite : callSitesToBeRemovedFromParent) { callSite->eraseFromParent(); } } /// ReplaceUsesOfNonProtoTypeWithRealFunction - This function is called when we /// implement a function with no prototype, e.g. "int foo() {}". If there are /// existing call uses of the old function in the module, this adjusts them to /// call the new function directly. /// /// This is not just a cleanup: the always_inline pass requires direct calls to /// functions to be able to inline them. If there is a bitcast in the way, it /// won't inline them. Instcombine normally deletes these calls, but it isn't /// run at -O0. static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn) { // If we're redefining a global as a function, don't transform it. if (!isa(Old)) return; replaceUsesOfNonProtoConstant(Old, NewFn); } void CodeGenModule::HandleCXXStaticMemberVarInstantiation(VarDecl *VD) { auto DK = VD->isThisDeclarationADefinition(); if ((DK == VarDecl::Definition && VD->hasAttr()) || (LangOpts.CUDA && !shouldEmitCUDAGlobalVar(VD))) return; TemplateSpecializationKind TSK = VD->getTemplateSpecializationKind(); // If we have a definition, this might be a deferred decl. If the // instantiation is explicit, make sure we emit it at the end. if (VD->getDefinition() && TSK == TSK_ExplicitInstantiationDefinition) GetAddrOfGlobalVar(VD); EmitTopLevelDecl(VD); } void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); // Compute the function info and LLVM type. const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); // Get or create the prototype for the function. if (!GV || (GV->getValueType() != Ty)) GV = cast(GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/true, ForDefinition)); // Already emitted. if (!GV->isDeclaration()) return; // We need to set linkage and visibility on the function before // generating code for it because various parts of IR generation // want to propagate this information down (e.g. to local static // declarations). auto *Fn = cast(GV); setFunctionLinkage(GD, Fn); // FIXME: this is redundant with part of setFunctionDefinitionAttributes setGVProperties(Fn, GD); MaybeHandleStaticInExternC(D, Fn); maybeSetTrivialComdat(*D, *Fn); CodeGenFunction(*this).GenerateCode(GD, Fn, FI); setNonAliasAttributes(GD, Fn); SetLLVMFunctionAttributesForDefinition(D, Fn); if (const ConstructorAttr *CA = D->getAttr()) AddGlobalCtor(Fn, CA->getPriority()); if (const DestructorAttr *DA = D->getAttr()) AddGlobalDtor(Fn, DA->getPriority(), true); if (getLangOpts().OpenMP && D->hasAttr()) getOpenMPRuntime().emitDeclareTargetFunction(D, GV); } void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); const AliasAttr *AA = D->getAttr(); assert(AA && "Not an alias?"); StringRef MangledName = getMangledName(GD); if (AA->getAliasee() == MangledName) { Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } // If there is a definition in the module, then it wins over the alias. // This is dubious, but allow it to be safe. Just ignore the alias. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry && !Entry->isDeclaration()) return; Aliases.push_back(GD); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); // Create a reference to the named value. This ensures that it is emitted // if a deferred decl. llvm::Constant *Aliasee; llvm::GlobalValue::LinkageTypes LT; if (isa(DeclTy)) { Aliasee = GetOrCreateLLVMFunction(AA->getAliasee(), DeclTy, GD, /*ForVTable=*/false); LT = getFunctionLinkage(GD); } else { Aliasee = GetOrCreateLLVMGlobal(AA->getAliasee(), DeclTy, LangAS::Default, /*D=*/nullptr); if (const auto *VD = dyn_cast(GD.getDecl())) LT = getLLVMLinkageVarDefinition(VD); else LT = getFunctionLinkage(GD); } // Create the new alias itself, but don't set a name yet. unsigned AS = Aliasee->getType()->getPointerAddressSpace(); auto *GA = llvm::GlobalAlias::create(DeclTy, AS, LT, "", Aliasee, &getModule()); if (Entry) { if (GA->getAliasee() == Entry) { Diags.Report(AA->getLocation(), diag::err_cyclic_alias) << 0; return; } assert(Entry->isDeclaration()); // If there is a declaration in the module, then we had an extern followed // by the alias, as in: // extern int test6(); // ... // int test6() __attribute__((alias("test7"))); // // Remove it and replace uses of it with the alias. GA->takeName(Entry); Entry->replaceAllUsesWith(GA); Entry->eraseFromParent(); } else { GA->setName(MangledName); } // Set attributes which are particular to an alias; this is a // specialization of the attributes which may be set on a global // variable/function. if (D->hasAttr() || D->hasAttr() || D->isWeakImported()) { GA->setLinkage(llvm::Function::WeakAnyLinkage); } if (const auto *VD = dyn_cast(D)) if (VD->getTLSKind()) setTLSMode(GA, *VD); SetCommonAttributes(GD, GA); // Emit global alias debug information. if (isa(D)) if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitGlobalAlias(cast(GA->getAliasee()->stripPointerCasts()), GD); } void CodeGenModule::emitIFuncDefinition(GlobalDecl GD) { const auto *D = cast(GD.getDecl()); const IFuncAttr *IFA = D->getAttr(); assert(IFA && "Not an ifunc?"); StringRef MangledName = getMangledName(GD); if (IFA->getResolver() == MangledName) { Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; return; } // Report an error if some definition overrides ifunc. llvm::GlobalValue *Entry = GetGlobalValue(MangledName); if (Entry && !Entry->isDeclaration()) { GlobalDecl OtherGD; if (lookupRepresentativeDecl(MangledName, OtherGD) && DiagnosedConflictingDefinitions.insert(GD).second) { Diags.Report(D->getLocation(), diag::err_duplicate_mangled_name) << MangledName; Diags.Report(OtherGD.getDecl()->getLocation(), diag::note_previous_definition); } return; } Aliases.push_back(GD); // The resolver might not be visited yet. Specify a dummy non-function type to // indicate IsIncompleteFunction. Either the type is ignored (if the resolver // was emitted) or the whole function will be replaced (if the resolver has // not been emitted). llvm::Constant *Resolver = GetOrCreateLLVMFunction(IFA->getResolver(), VoidTy, {}, /*ForVTable=*/false); llvm::Type *DeclTy = getTypes().ConvertTypeForMem(D->getType()); llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create(DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); if (Entry) { if (GIF->getResolver() == Entry) { Diags.Report(IFA->getLocation(), diag::err_cyclic_alias) << 1; return; } assert(Entry->isDeclaration()); // If there is a declaration in the module, then we had an extern followed // by the ifunc, as in: // extern int test(); // ... // int test() __attribute__((ifunc("resolver"))); // // Remove it and replace uses of it with the ifunc. GIF->takeName(Entry); Entry->replaceAllUsesWith(GIF); Entry->eraseFromParent(); } else GIF->setName(MangledName); SetCommonAttributes(GD, GIF); } llvm::Function *CodeGenModule::getIntrinsic(unsigned IID, ArrayRef Tys) { return llvm::Intrinsic::getDeclaration(&getModule(), (llvm::Intrinsic::ID)IID, Tys); } static llvm::StringMapEntry & GetConstantCFStringEntry(llvm::StringMap &Map, const StringLiteral *Literal, bool TargetIsLSB, bool &IsUTF16, unsigned &StringLength) { StringRef String = Literal->getString(); unsigned NumBytes = String.size(); // Check for simple case. if (!Literal->containsNonAsciiOrNull()) { StringLength = NumBytes; return *Map.insert(std::make_pair(String, nullptr)).first; } // Otherwise, convert the UTF8 literals into a string of shorts. IsUTF16 = true; SmallVector ToBuf(NumBytes + 1); // +1 for ending nulls. const llvm::UTF8 *FromPtr = (const llvm::UTF8 *)String.data(); llvm::UTF16 *ToPtr = &ToBuf[0]; (void)llvm::ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes, &ToPtr, ToPtr + NumBytes, llvm::strictConversion); // ConvertUTF8toUTF16 returns the length in ToPtr. StringLength = ToPtr - &ToBuf[0]; // Add an explicit null. *ToPtr = 0; return *Map.insert(std::make_pair( StringRef(reinterpret_cast(ToBuf.data()), (StringLength + 1) * 2), nullptr)).first; } ConstantAddress CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) { unsigned StringLength = 0; bool isUTF16 = false; llvm::StringMapEntry &Entry = GetConstantCFStringEntry(CFConstantStringMap, Literal, getDataLayout().isLittleEndian(), isUTF16, StringLength); if (auto *C = Entry.second) return ConstantAddress( C, C->getValueType(), CharUnits::fromQuantity(C->getAlignment())); const ASTContext &Context = getContext(); const llvm::Triple &Triple = getTriple(); const auto CFRuntime = getLangOpts().CFRuntime; const bool IsSwiftABI = static_cast(CFRuntime) >= static_cast(LangOptions::CoreFoundationABI::Swift); const bool IsSwift4_1 = CFRuntime == LangOptions::CoreFoundationABI::Swift4_1; // If we don't already have it, get __CFConstantStringClassReference. if (!CFConstantStringClassRef) { const char *CFConstantStringClassName = "__CFConstantStringClassReference"; llvm::Type *Ty = getTypes().ConvertType(getContext().IntTy); Ty = llvm::ArrayType::get(Ty, 0); switch (CFRuntime) { default: break; case LangOptions::CoreFoundationABI::Swift: [[fallthrough]]; case LangOptions::CoreFoundationABI::Swift5_0: CFConstantStringClassName = Triple.isOSDarwin() ? "$s15SwiftFoundation19_NSCFConstantStringCN" : "$s10Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; case LangOptions::CoreFoundationABI::Swift4_2: CFConstantStringClassName = Triple.isOSDarwin() ? "$S15SwiftFoundation19_NSCFConstantStringCN" : "$S10Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; case LangOptions::CoreFoundationABI::Swift4_1: CFConstantStringClassName = Triple.isOSDarwin() ? "__T015SwiftFoundation19_NSCFConstantStringCN" : "__T010Foundation19_NSCFConstantStringCN"; Ty = IntPtrTy; break; } llvm::Constant *C = CreateRuntimeVariable(Ty, CFConstantStringClassName); if (Triple.isOSBinFormatELF() || Triple.isOSBinFormatCOFF()) { llvm::GlobalValue *GV = nullptr; if ((GV = dyn_cast(C))) { IdentifierInfo &II = Context.Idents.get(GV->getName()); TranslationUnitDecl *TUDecl = Context.getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); const VarDecl *VD = nullptr; for (const auto *Result : DC->lookup(&II)) if ((VD = dyn_cast(Result))) break; if (Triple.isOSBinFormatELF()) { if (!VD) GV->setLinkage(llvm::GlobalValue::ExternalLinkage); } else { GV->setLinkage(llvm::GlobalValue::ExternalLinkage); if (!VD || !VD->hasAttr()) GV->setDLLStorageClass(llvm::GlobalValue::DLLImportStorageClass); else GV->setDLLStorageClass(llvm::GlobalValue::DLLExportStorageClass); } setDSOLocal(GV); } } // Decay array -> ptr CFConstantStringClassRef = IsSwiftABI ? llvm::ConstantExpr::getPtrToInt(C, Ty) : C; } QualType CFTy = Context.getCFConstantStringType(); auto *STy = cast(getTypes().ConvertType(CFTy)); ConstantInitBuilder Builder(*this); auto Fields = Builder.beginStruct(STy); // Class pointer. Fields.add(cast(CFConstantStringClassRef)); // Flags. if (IsSwiftABI) { Fields.addInt(IntPtrTy, IsSwift4_1 ? 0x05 : 0x01); Fields.addInt(Int64Ty, isUTF16 ? 0x07d0 : 0x07c8); } else { Fields.addInt(IntTy, isUTF16 ? 0x07d0 : 0x07C8); } // String pointer. llvm::Constant *C = nullptr; if (isUTF16) { auto Arr = llvm::ArrayRef( reinterpret_cast(const_cast(Entry.first().data())), Entry.first().size() / 2); C = llvm::ConstantDataArray::get(VMContext, Arr); } else { C = llvm::ConstantDataArray::getString(VMContext, Entry.first()); } // Note: -fwritable-strings doesn't make the backing store strings of // CFStrings writable. auto *GV = new llvm::GlobalVariable(getModule(), C->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, C, ".str"); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); // Don't enforce the target's minimum global alignment, since the only use // of the string is via this class initializer. CharUnits Align = isUTF16 ? Context.getTypeAlignInChars(Context.ShortTy) : Context.getTypeAlignInChars(Context.CharTy); GV->setAlignment(Align.getAsAlign()); // FIXME: We set the section explicitly to avoid a bug in ld64 224.1. // Without it LLVM can merge the string with a non unnamed_addr one during // LTO. Doing that changes the section it ends in, which surprises ld64. if (Triple.isOSBinFormatMachO()) GV->setSection(isUTF16 ? "__TEXT,__ustring" : "__TEXT,__cstring,cstring_literals"); // Make sure the literal ends up in .rodata to allow for safe ICF and for // the static linker to adjust permissions to read-only later on. else if (Triple.isOSBinFormatELF()) GV->setSection(".rodata"); // String. Fields.add(GV); // String length. llvm::IntegerType *LengthTy = llvm::IntegerType::get(getModule().getContext(), Context.getTargetInfo().getLongWidth()); if (IsSwiftABI) { if (CFRuntime == LangOptions::CoreFoundationABI::Swift4_1 || CFRuntime == LangOptions::CoreFoundationABI::Swift4_2) LengthTy = Int32Ty; else LengthTy = IntPtrTy; } Fields.addInt(LengthTy, StringLength); // Swift ABI requires 8-byte alignment to ensure that the _Atomic(uint64_t) is // properly aligned on 32-bit platforms. CharUnits Alignment = IsSwiftABI ? Context.toCharUnitsFromBits(64) : getPointerAlign(); // The struct. GV = Fields.finishAndCreateGlobal("_unnamed_cfstring_", Alignment, /*isConstant=*/false, llvm::GlobalVariable::PrivateLinkage); GV->addAttribute("objc_arc_inert"); switch (Triple.getObjectFormat()) { case llvm::Triple::UnknownObjectFormat: llvm_unreachable("unknown file format"); case llvm::Triple::DXContainer: case llvm::Triple::GOFF: case llvm::Triple::SPIRV: case llvm::Triple::XCOFF: llvm_unreachable("unimplemented"); case llvm::Triple::COFF: case llvm::Triple::ELF: case llvm::Triple::Wasm: GV->setSection("cfstring"); break; case llvm::Triple::MachO: GV->setSection("__DATA,__cfstring"); break; } Entry.second = GV; return ConstantAddress(GV, GV->getValueType(), Alignment); } bool CodeGenModule::getExpressionLocationsEnabled() const { return !CodeGenOpts.EmitCodeView || CodeGenOpts.DebugColumnInfo; } QualType CodeGenModule::getObjCFastEnumerationStateType() { if (ObjCFastEnumerationStateType.isNull()) { RecordDecl *D = Context.buildImplicitRecord("__objcFastEnumerationState"); D->startDefinition(); QualType FieldTypes[] = { Context.UnsignedLongTy, Context.getPointerType(Context.getObjCIdType()), Context.getPointerType(Context.UnsignedLongTy), Context.getConstantArrayType(Context.UnsignedLongTy, llvm::APInt(32, 5), nullptr, ArraySizeModifier::Normal, 0)}; for (size_t i = 0; i < 4; ++i) { FieldDecl *Field = FieldDecl::Create(Context, D, SourceLocation(), SourceLocation(), nullptr, FieldTypes[i], /*TInfo=*/nullptr, /*BitWidth=*/nullptr, /*Mutable=*/false, ICIS_NoInit); Field->setAccess(AS_public); D->addDecl(Field); } D->completeDefinition(); ObjCFastEnumerationStateType = Context.getTagDeclType(D); } return ObjCFastEnumerationStateType; } llvm::Constant * CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) { assert(!E->getType()->isPointerType() && "Strings are always arrays"); // Don't emit it as the address of the string, emit the string data itself // as an inline array. if (E->getCharByteWidth() == 1) { SmallString<64> Str(E->getString()); // Resize the string to the right size, which is indicated by its type. const ConstantArrayType *CAT = Context.getAsConstantArrayType(E->getType()); assert(CAT && "String literal not of constant array type!"); Str.resize(CAT->getZExtSize()); return llvm::ConstantDataArray::getString(VMContext, Str, false); } auto *AType = cast(getTypes().ConvertType(E->getType())); llvm::Type *ElemTy = AType->getElementType(); unsigned NumElements = AType->getNumElements(); // Wide strings have either 2-byte or 4-byte elements. if (ElemTy->getPrimitiveSizeInBits() == 16) { SmallVector Elements; Elements.reserve(NumElements); for(unsigned i = 0, e = E->getLength(); i != e; ++i) Elements.push_back(E->getCodeUnit(i)); Elements.resize(NumElements); return llvm::ConstantDataArray::get(VMContext, Elements); } assert(ElemTy->getPrimitiveSizeInBits() == 32); SmallVector Elements; Elements.reserve(NumElements); for(unsigned i = 0, e = E->getLength(); i != e; ++i) Elements.push_back(E->getCodeUnit(i)); Elements.resize(NumElements); return llvm::ConstantDataArray::get(VMContext, Elements); } static llvm::GlobalVariable * GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT, CodeGenModule &CGM, StringRef GlobalName, CharUnits Alignment) { unsigned AddrSpace = CGM.getContext().getTargetAddressSpace( CGM.GetGlobalConstantAddressSpace()); llvm::Module &M = CGM.getModule(); // Create a global variable for this string auto *GV = new llvm::GlobalVariable( M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName, nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace); GV->setAlignment(Alignment.getAsAlign()); GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); if (GV->isWeakForLinker()) { assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals"); GV->setComdat(M.getOrInsertComdat(GV->getName())); } CGM.setDSOLocal(GV); return GV; } /// GetAddrOfConstantStringFromLiteral - Return a pointer to a /// constant array for the given string literal. ConstantAddress CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S, StringRef Name) { CharUnits Alignment = getContext().getAlignOfGlobalVarInChars(S->getType(), /*VD=*/nullptr); llvm::Constant *C = GetConstantArrayFromStringLiteral(S); llvm::GlobalVariable **Entry = nullptr; if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (uint64_t(Alignment.getQuantity()) > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } } SmallString<256> MangledNameBuffer; StringRef GlobalVariableName; llvm::GlobalValue::LinkageTypes LT; // Mangle the string literal if that's how the ABI merges duplicate strings. // Don't do it if they are writable, since we don't want writes in one TU to // affect strings in another. if (getCXXABI().getMangleContext().shouldMangleStringLiteral(S) && !LangOpts.WritableStrings) { llvm::raw_svector_ostream Out(MangledNameBuffer); getCXXABI().getMangleContext().mangleStringLiteral(S, Out); LT = llvm::GlobalValue::LinkOnceODRLinkage; GlobalVariableName = MangledNameBuffer; } else { LT = llvm::GlobalValue::PrivateLinkage; GlobalVariableName = Name; } auto GV = GenerateStringLiteral(C, LT, *this, GlobalVariableName, Alignment); CGDebugInfo *DI = getModuleDebugInfo(); if (DI && getCodeGenOpts().hasReducedDebugInfo()) DI->AddStringLiteralDebugInfo(GV, S); if (Entry) *Entry = GV; SanitizerMD->reportGlobal(GV, S->getStrTokenLoc(0), ""); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant /// array for the given ObjCEncodeExpr node. ConstantAddress CodeGenModule::GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *E) { std::string Str; getContext().getObjCEncodingForType(E->getEncodedType(), Str); return GetAddrOfConstantCString(Str); } /// GetAddrOfConstantCString - Returns a pointer to a character array containing /// the literal and a terminating '\0' character. /// The result has pointer to array type. ConstantAddress CodeGenModule::GetAddrOfConstantCString( const std::string &Str, const char *GlobalName) { StringRef StrWithNull(Str.c_str(), Str.size() + 1); CharUnits Alignment = getContext().getAlignOfGlobalVarInChars( getContext().CharTy, /*VD=*/nullptr); llvm::Constant *C = llvm::ConstantDataArray::getString(getLLVMContext(), StrWithNull, false); // Don't share any string literals if strings aren't constant. llvm::GlobalVariable **Entry = nullptr; if (!LangOpts.WritableStrings) { Entry = &ConstantStringMap[C]; if (auto GV = *Entry) { if (uint64_t(Alignment.getQuantity()) > GV->getAlignment()) GV->setAlignment(Alignment.getAsAlign()); return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } } // Get the default prefix if a name wasn't specified. if (!GlobalName) GlobalName = ".str"; // Create a global variable for this. auto GV = GenerateStringLiteral(C, llvm::GlobalValue::PrivateLinkage, *this, GlobalName, Alignment); if (Entry) *Entry = GV; return ConstantAddress(castStringLiteralToDefaultAddressSpace(*this, GV), GV->getValueType(), Alignment); } ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( const MaterializeTemporaryExpr *E, const Expr *Init) { assert((E->getStorageDuration() == SD_Static || E->getStorageDuration() == SD_Thread) && "not a global temporary"); const auto *VD = cast(E->getExtendingDecl()); // If we're not materializing a subobject of the temporary, keep the // cv-qualifiers from the type of the MaterializeTemporaryExpr. QualType MaterializedType = Init->getType(); if (Init == E->getSubExpr()) MaterializedType = E->getType(); CharUnits Align = getContext().getTypeAlignInChars(MaterializedType); auto InsertResult = MaterializedGlobalTemporaryMap.insert({E, nullptr}); if (!InsertResult.second) { // We've seen this before: either we already created it or we're in the // process of doing so. if (!InsertResult.first->second) { // We recursively re-entered this function, probably during emission of // the initializer. Create a placeholder. We'll clean this up in the // outer call, at the end of this function. llvm::Type *Type = getTypes().ConvertTypeForMem(MaterializedType); InsertResult.first->second = new llvm::GlobalVariable( getModule(), Type, false, llvm::GlobalVariable::InternalLinkage, nullptr); } return ConstantAddress(InsertResult.first->second, llvm::cast( InsertResult.first->second->stripPointerCasts()) ->getValueType(), Align); } // FIXME: If an externally-visible declaration extends multiple temporaries, // we need to give each temporary the same name in every translation unit (and // we also need to make the temporaries externally-visible). SmallString<256> Name; llvm::raw_svector_ostream Out(Name); getCXXABI().getMangleContext().mangleReferenceTemporary( VD, E->getManglingNumber(), Out); APValue *Value = nullptr; if (E->getStorageDuration() == SD_Static && VD->evaluateValue()) { // If the initializer of the extending declaration is a constant // initializer, we should have a cached constant initializer for this // temporary. Note that this might have a different value from the value // computed by evaluating the initializer if the surrounding constant // expression modifies the temporary. Value = E->getOrCreateValue(false); } // Try evaluating it now, it might have a constant initializer. Expr::EvalResult EvalResult; if (!Value && Init->EvaluateAsRValue(EvalResult, getContext()) && !EvalResult.hasSideEffects()) Value = &EvalResult.Val; LangAS AddrSpace = GetGlobalVarAddressSpace(VD); std::optional emitter; llvm::Constant *InitialValue = nullptr; bool Constant = false; llvm::Type *Type; if (Value) { // The temporary has a constant initializer, use it. emitter.emplace(*this); InitialValue = emitter->emitForInitializer(*Value, AddrSpace, MaterializedType); Constant = MaterializedType.isConstantStorage(getContext(), /*ExcludeCtor*/ Value, /*ExcludeDtor*/ false); Type = InitialValue->getType(); } else { // No initializer, the initialization will be provided when we // initialize the declaration which performed lifetime extension. Type = getTypes().ConvertTypeForMem(MaterializedType); } // Create a global variable for this lifetime-extended temporary. llvm::GlobalValue::LinkageTypes Linkage = getLLVMLinkageVarDefinition(VD); if (Linkage == llvm::GlobalVariable::ExternalLinkage) { const VarDecl *InitVD; if (VD->isStaticDataMember() && VD->getAnyInitializer(InitVD) && isa(InitVD->getLexicalDeclContext())) { // Temporaries defined inside a class get linkonce_odr linkage because the // class can be defined in multiple translation units. Linkage = llvm::GlobalVariable::LinkOnceODRLinkage; } else { // There is no need for this temporary to have external linkage if the // VarDecl has external linkage. Linkage = llvm::GlobalVariable::InternalLinkage; } } auto TargetAS = getContext().getTargetAddressSpace(AddrSpace); auto *GV = new llvm::GlobalVariable( getModule(), Type, Constant, Linkage, InitialValue, Name.c_str(), /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal, TargetAS); if (emitter) emitter->finalize(GV); // Don't assign dllimport or dllexport to local linkage globals. if (!llvm::GlobalValue::isLocalLinkage(Linkage)) { setGVProperties(GV, VD); if (GV->getDLLStorageClass() == llvm::GlobalVariable::DLLExportStorageClass) // The reference temporary should never be dllexport. GV->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass); } GV->setAlignment(Align.getAsAlign()); if (supportsCOMDAT() && GV->isWeakForLinker()) GV->setComdat(TheModule.getOrInsertComdat(GV->getName())); if (VD->getTLSKind()) setTLSMode(GV, *VD); llvm::Constant *CV = GV; if (AddrSpace != LangAS::Default) CV = getTargetCodeGenInfo().performAddrSpaceCast( *this, GV, AddrSpace, LangAS::Default, llvm::PointerType::get( getLLVMContext(), getContext().getTargetAddressSpace(LangAS::Default))); // Update the map with the new temporary. If we created a placeholder above, // replace it with the new global now. llvm::Constant *&Entry = MaterializedGlobalTemporaryMap[E]; if (Entry) { Entry->replaceAllUsesWith(CV); llvm::cast(Entry)->eraseFromParent(); } Entry = CV; return ConstantAddress(CV, Type, Align); } /// EmitObjCPropertyImplementations - Emit information for synthesized /// properties for an implementation. void CodeGenModule::EmitObjCPropertyImplementations(const ObjCImplementationDecl *D) { for (const auto *PID : D->property_impls()) { // Dynamic is just for type-checking. if (PID->getPropertyImplementation() == ObjCPropertyImplDecl::Synthesize) { ObjCPropertyDecl *PD = PID->getPropertyDecl(); // Determine which methods need to be implemented, some may have // been overridden. Note that ::isPropertyAccessor is not the method // we want, that just indicates if the decl came from a // property. What we want to know is if the method is defined in // this implementation. auto *Getter = PID->getGetterMethodDecl(); if (!Getter || Getter->isSynthesizedAccessorStub()) CodeGenFunction(*this).GenerateObjCGetter( const_cast(D), PID); auto *Setter = PID->getSetterMethodDecl(); if (!PD->isReadOnly() && (!Setter || Setter->isSynthesizedAccessorStub())) CodeGenFunction(*this).GenerateObjCSetter( const_cast(D), PID); } } } static bool needsDestructMethod(ObjCImplementationDecl *impl) { const ObjCInterfaceDecl *iface = impl->getClassInterface(); for (const ObjCIvarDecl *ivar = iface->all_declared_ivar_begin(); ivar; ivar = ivar->getNextIvar()) if (ivar->getType().isDestructedType()) return true; return false; } static bool AllTrivialInitializers(CodeGenModule &CGM, ObjCImplementationDecl *D) { CodeGenFunction CGF(CGM); for (ObjCImplementationDecl::init_iterator B = D->init_begin(), E = D->init_end(); B != E; ++B) { CXXCtorInitializer *CtorInitExp = *B; Expr *Init = CtorInitExp->getInit(); if (!CGF.isTrivialInitializer(Init)) return false; } return true; } /// EmitObjCIvarInitializations - Emit information for ivar initialization /// for an implementation. void CodeGenModule::EmitObjCIvarInitializations(ObjCImplementationDecl *D) { // We might need a .cxx_destruct even if we don't have any ivar initializers. if (needsDestructMethod(D)) { const IdentifierInfo *II = &getContext().Idents.get(".cxx_destruct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); ObjCMethodDecl *DTORMethod = ObjCMethodDecl::Create( getContext(), D->getLocation(), D->getLocation(), cxxSelector, getContext().VoidTy, nullptr, D, /*isInstance=*/true, /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, /*isDefined=*/false, ObjCImplementationControl::Required); D->addInstanceMethod(DTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, DTORMethod, false); D->setHasDestructors(true); } // If the implementation doesn't have any ivar initializers, we don't need // a .cxx_construct. if (D->getNumIvarInitializers() == 0 || AllTrivialInitializers(*this, D)) return; const IdentifierInfo *II = &getContext().Idents.get(".cxx_construct"); Selector cxxSelector = getContext().Selectors.getSelector(0, &II); // The constructor returns 'self'. ObjCMethodDecl *CTORMethod = ObjCMethodDecl::Create( getContext(), D->getLocation(), D->getLocation(), cxxSelector, getContext().getObjCIdType(), nullptr, D, /*isInstance=*/true, /*isVariadic=*/false, /*isPropertyAccessor=*/true, /*isSynthesizedAccessorStub=*/false, /*isImplicitlyDeclared=*/true, /*isDefined=*/false, ObjCImplementationControl::Required); D->addInstanceMethod(CTORMethod); CodeGenFunction(*this).GenerateObjCCtorDtorMethod(D, CTORMethod, true); D->setHasNonZeroConstructors(true); } // EmitLinkageSpec - Emit all declarations in a linkage spec. void CodeGenModule::EmitLinkageSpec(const LinkageSpecDecl *LSD) { if (LSD->getLanguage() != LinkageSpecLanguageIDs::C && LSD->getLanguage() != LinkageSpecLanguageIDs::CXX) { ErrorUnsupported(LSD, "linkage spec"); return; } EmitDeclContext(LSD); } void CodeGenModule::EmitTopLevelStmt(const TopLevelStmtDecl *D) { // Device code should not be at top level. if (LangOpts.CUDA && LangOpts.CUDAIsDevice) return; std::unique_ptr &CurCGF = GlobalTopLevelStmtBlockInFlight.first; // We emitted a top-level stmt but after it there is initialization. // Stop squashing the top-level stmts into a single function. if (CurCGF && CXXGlobalInits.back() != CurCGF->CurFn) { CurCGF->FinishFunction(D->getEndLoc()); CurCGF = nullptr; } if (!CurCGF) { // void __stmts__N(void) // FIXME: Ask the ABI name mangler to pick a name. std::string Name = "__stmts__" + llvm::utostr(CXXGlobalInits.size()); FunctionArgList Args; QualType RetTy = getContext().VoidTy; const CGFunctionInfo &FnInfo = getTypes().arrangeBuiltinFunctionDeclaration(RetTy, Args); llvm::FunctionType *FnTy = getTypes().GetFunctionType(FnInfo); llvm::Function *Fn = llvm::Function::Create( FnTy, llvm::GlobalValue::InternalLinkage, Name, &getModule()); CurCGF.reset(new CodeGenFunction(*this)); GlobalTopLevelStmtBlockInFlight.second = D; CurCGF->StartFunction(GlobalDecl(), RetTy, Fn, FnInfo, Args, D->getBeginLoc(), D->getBeginLoc()); CXXGlobalInits.push_back(Fn); } CurCGF->EmitStmt(D->getStmt()); } void CodeGenModule::EmitDeclContext(const DeclContext *DC) { for (auto *I : DC->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope // are themselves considered "top-level", so EmitTopLevelDecl on an // ObjCImplDecl does not recursively visit them. We need to do that in // case they're nested inside another construct (LinkageSpecDecl / // ExportDecl) that does stop them from being considered "top-level". if (auto *OID = dyn_cast(I)) { for (auto *M : OID->methods()) EmitTopLevelDecl(M); } EmitTopLevelDecl(I); } } /// EmitTopLevelDecl - Emit code for a single top level declaration. void CodeGenModule::EmitTopLevelDecl(Decl *D) { // Ignore dependent declarations. if (D->isTemplated()) return; // Consteval function shouldn't be emitted. if (auto *FD = dyn_cast(D); FD && FD->isImmediateFunction()) return; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: EmitGlobal(cast(D)); // Always provide some coverage mapping // even for the functions that aren't emitted. AddDeferredUnusedCoverageMapping(D); break; case Decl::CXXDeductionGuide: // Function-like, but does not result in code emission. break; case Decl::Var: case Decl::Decomposition: case Decl::VarTemplateSpecialization: EmitGlobal(cast(D)); if (auto *DD = dyn_cast(D)) for (auto *B : DD->bindings()) if (auto *HD = B->getHoldingVar()) EmitGlobal(HD); break; // Indirect fields from global anonymous structs and unions can be // ignored; only the actual variable requires IR gen support. case Decl::IndirectField: break; // C++ Decls case Decl::Namespace: EmitDeclContext(cast(D)); break; case Decl::ClassTemplateSpecialization: { const auto *Spec = cast(D); if (CGDebugInfo *DI = getModuleDebugInfo()) if (Spec->getSpecializationKind() == TSK_ExplicitInstantiationDefinition && Spec->hasDefinition()) DI->completeTemplateDefinition(*Spec); } [[fallthrough]]; case Decl::CXXRecord: { CXXRecordDecl *CRD = cast(D); if (CGDebugInfo *DI = getModuleDebugInfo()) { if (CRD->hasDefinition()) DI->EmitAndRetainType(getContext().getRecordType(cast(D))); if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) DI->completeUnusedClass(*CRD); } // Emit any static data members, they may be definitions. for (auto *I : CRD->decls()) if (isa(I) || isa(I)) EmitTopLevelDecl(I); break; } // No code generation needed. case Decl::UsingShadow: case Decl::ClassTemplate: case Decl::VarTemplate: case Decl::Concept: case Decl::VarTemplatePartialSpecialization: case Decl::FunctionTemplate: case Decl::TypeAliasTemplate: case Decl::Block: case Decl::Empty: case Decl::Binding: break; case Decl::Using: // using X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDecl(cast(*D)); break; case Decl::UsingEnum: // using enum X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingEnumDecl(cast(*D)); break; case Decl::NamespaceAlias: if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitNamespaceAlias(cast(*D)); break; case Decl::UsingDirective: // using namespace X; [C++] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitUsingDirective(cast(*D)); break; case Decl::CXXConstructor: getCXXABI().EmitCXXConstructors(cast(D)); break; case Decl::CXXDestructor: getCXXABI().EmitCXXDestructors(cast(D)); break; case Decl::StaticAssert: // Nothing to do. break; // Objective-C Decls // Forward declarations, no (immediate) code generation. case Decl::ObjCInterface: case Decl::ObjCCategory: break; case Decl::ObjCProtocol: { auto *Proto = cast(D); if (Proto->isThisDeclarationADefinition()) ObjCRuntime->GenerateProtocol(Proto); break; } case Decl::ObjCCategoryImpl: // Categories have properties but don't support synthesize so we // can ignore them here. ObjCRuntime->GenerateCategory(cast(D)); break; case Decl::ObjCImplementation: { auto *OMD = cast(D); EmitObjCPropertyImplementations(OMD); EmitObjCIvarInitializations(OMD); ObjCRuntime->GenerateClass(OMD); // Emit global variable debug information. if (CGDebugInfo *DI = getModuleDebugInfo()) if (getCodeGenOpts().hasReducedDebugInfo()) DI->getOrCreateInterfaceType(getContext().getObjCInterfaceType( OMD->getClassInterface()), OMD->getLocation()); break; } case Decl::ObjCMethod: { auto *OMD = cast(D); // If this is not a prototype, emit the body. if (OMD->getBody()) CodeGenFunction(*this).GenerateObjCMethod(OMD); break; } case Decl::ObjCCompatibleAlias: ObjCRuntime->RegisterAlias(cast(D)); break; case Decl::PragmaComment: { const auto *PCD = cast(D); switch (PCD->getCommentKind()) { case PCK_Unknown: llvm_unreachable("unexpected pragma comment kind"); case PCK_Linker: AppendLinkerOptions(PCD->getArg()); break; case PCK_Lib: AddDependentLib(PCD->getArg()); break; case PCK_Compiler: case PCK_ExeStr: case PCK_User: break; // We ignore all of these. } break; } case Decl::PragmaDetectMismatch: { const auto *PDMD = cast(D); AddDetectMismatch(PDMD->getName(), PDMD->getValue()); break; } case Decl::LinkageSpec: EmitLinkageSpec(cast(D)); break; case Decl::FileScopeAsm: { // File-scope asm is ignored during device-side CUDA compilation. if (LangOpts.CUDA && LangOpts.CUDAIsDevice) break; // File-scope asm is ignored during device-side OpenMP compilation. if (LangOpts.OpenMPIsTargetDevice) break; // File-scope asm is ignored during device-side SYCL compilation. if (LangOpts.SYCLIsDevice) break; auto *AD = cast(D); getModule().appendModuleInlineAsm(AD->getAsmString()->getString()); break; } case Decl::TopLevelStmt: EmitTopLevelStmt(cast(D)); break; case Decl::Import: { auto *Import = cast(D); // If we've already imported this module, we're done. if (!ImportedModules.insert(Import->getImportedModule())) break; // Emit debug information for direct imports. if (!Import->getImportedOwningModule()) { if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitImportDecl(*Import); } // For C++ standard modules we are done - we will call the module // initializer for imported modules, and that will likewise call those for // any imports it has. if (CXX20ModuleInits && Import->getImportedOwningModule() && !Import->getImportedOwningModule()->isModuleMapModule()) break; // For clang C++ module map modules the initializers for sub-modules are // emitted here. // Find all of the submodules and emit the module initializers. llvm::SmallPtrSet Visited; SmallVector Stack; Visited.insert(Import->getImportedModule()); Stack.push_back(Import->getImportedModule()); while (!Stack.empty()) { clang::Module *Mod = Stack.pop_back_val(); if (!EmittedModuleInitializers.insert(Mod).second) continue; for (auto *D : Context.getModuleInitializers(Mod)) EmitTopLevelDecl(D); // Visit the submodules of this module. for (auto *Submodule : Mod->submodules()) { // Skip explicit children; they need to be explicitly imported to emit // the initializers. if (Submodule->IsExplicit) continue; if (Visited.insert(Submodule).second) Stack.push_back(Submodule); } } break; } case Decl::Export: EmitDeclContext(cast(D)); break; case Decl::OMPThreadPrivate: EmitOMPThreadPrivateDecl(cast(D)); break; case Decl::OMPAllocate: EmitOMPAllocateDecl(cast(D)); break; case Decl::OMPDeclareReduction: EmitOMPDeclareReduction(cast(D)); break; case Decl::OMPDeclareMapper: EmitOMPDeclareMapper(cast(D)); break; case Decl::OMPRequires: EmitOMPRequiresDecl(cast(D)); break; case Decl::Typedef: case Decl::TypeAlias: // using foo = bar; [C++11] if (CGDebugInfo *DI = getModuleDebugInfo()) DI->EmitAndRetainType( getContext().getTypedefType(cast(D))); break; case Decl::Record: if (CGDebugInfo *DI = getModuleDebugInfo()) if (cast(D)->getDefinition()) DI->EmitAndRetainType(getContext().getRecordType(cast(D))); break; case Decl::Enum: if (CGDebugInfo *DI = getModuleDebugInfo()) if (cast(D)->getDefinition()) DI->EmitAndRetainType(getContext().getEnumType(cast(D))); break; case Decl::HLSLBuffer: getHLSLRuntime().addBuffer(cast(D)); break; default: // Make sure we handled everything we should, every other kind is a // non-top-level decl. FIXME: Would be nice to have an isTopLevelDeclKind // function. Need to recode Decl::Kind to do that easily. assert(isa(D) && "Unsupported decl kind"); break; } } void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) { // Do we need to generate coverage mapping? if (!CodeGenOpts.CoverageMapping) return; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: case Decl::ObjCMethod: case Decl::CXXConstructor: case Decl::CXXDestructor: { if (!cast(D)->doesThisDeclarationHaveABody()) break; SourceManager &SM = getContext().getSourceManager(); if (LimitedCoverage && SM.getMainFileID() != SM.getFileID(D->getBeginLoc())) break; if (!llvm::coverage::SystemHeadersCoverage && SM.isInSystemHeader(D->getBeginLoc())) break; DeferredEmptyCoverageMappingDecls.try_emplace(D, true); break; } default: break; }; } void CodeGenModule::ClearUnusedCoverageMapping(const Decl *D) { // Do we need to generate coverage mapping? if (!CodeGenOpts.CoverageMapping) return; if (const auto *Fn = dyn_cast(D)) { if (Fn->isTemplateInstantiation()) ClearUnusedCoverageMapping(Fn->getTemplateInstantiationPattern()); } DeferredEmptyCoverageMappingDecls.insert_or_assign(D, false); } void CodeGenModule::EmitDeferredUnusedCoverageMappings() { // We call takeVector() here to avoid use-after-free. // FIXME: DeferredEmptyCoverageMappingDecls is getting mutated because // we deserialize function bodies to emit coverage info for them, and that // deserializes more declarations. How should we handle that case? for (const auto &Entry : DeferredEmptyCoverageMappingDecls.takeVector()) { if (!Entry.second) continue; const Decl *D = Entry.first; switch (D->getKind()) { case Decl::CXXConversion: case Decl::CXXMethod: case Decl::Function: case Decl::ObjCMethod: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D)); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } case Decl::CXXConstructor: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D), Ctor_Base); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } case Decl::CXXDestructor: { CodeGenPGO PGO(*this); GlobalDecl GD(cast(D), Dtor_Base); PGO.emitEmptyCounterMapping(D, getMangledName(GD), getFunctionLinkage(GD)); break; } default: break; }; } } void CodeGenModule::EmitMainVoidAlias() { // In order to transition away from "__original_main" gracefully, emit an // alias for "main" in the no-argument case so that libc can detect when // new-style no-argument main is in used. if (llvm::Function *F = getModule().getFunction("main")) { if (!F->isDeclaration() && F->arg_size() == 0 && !F->isVarArg() && F->getReturnType()->isIntegerTy(Context.getTargetInfo().getIntWidth())) { auto *GA = llvm::GlobalAlias::create("__main_void", F); GA->setVisibility(llvm::GlobalValue::HiddenVisibility); } } } /// Turns the given pointer into a constant. static llvm::Constant *GetPointerConstant(llvm::LLVMContext &Context, const void *Ptr) { uintptr_t PtrInt = reinterpret_cast(Ptr); llvm::Type *i64 = llvm::Type::getInt64Ty(Context); return llvm::ConstantInt::get(i64, PtrInt); } static void EmitGlobalDeclMetadata(CodeGenModule &CGM, llvm::NamedMDNode *&GlobalMetadata, GlobalDecl D, llvm::GlobalValue *Addr) { if (!GlobalMetadata) GlobalMetadata = CGM.getModule().getOrInsertNamedMetadata("clang.global.decl.ptrs"); // TODO: should we report variant information for ctors/dtors? llvm::Metadata *Ops[] = {llvm::ConstantAsMetadata::get(Addr), llvm::ConstantAsMetadata::get(GetPointerConstant( CGM.getLLVMContext(), D.getDecl()))}; GlobalMetadata->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops)); } bool CodeGenModule::CheckAndReplaceExternCIFuncs(llvm::GlobalValue *Elem, llvm::GlobalValue *CppFunc) { // Store the list of ifuncs we need to replace uses in. llvm::SmallVector IFuncs; // List of ConstantExprs that we should be able to delete when we're done // here. llvm::SmallVector CEs; // It isn't valid to replace the extern-C ifuncs if all we find is itself! if (Elem == CppFunc) return false; // First make sure that all users of this are ifuncs (or ifuncs via a // bitcast), and collect the list of ifuncs and CEs so we can work on them // later. for (llvm::User *User : Elem->users()) { // Users can either be a bitcast ConstExpr that is used by the ifuncs, OR an // ifunc directly. In any other case, just give up, as we don't know what we // could break by changing those. if (auto *ConstExpr = dyn_cast(User)) { if (ConstExpr->getOpcode() != llvm::Instruction::BitCast) return false; for (llvm::User *CEUser : ConstExpr->users()) { if (auto *IFunc = dyn_cast(CEUser)) { IFuncs.push_back(IFunc); } else { return false; } } CEs.push_back(ConstExpr); } else if (auto *IFunc = dyn_cast(User)) { IFuncs.push_back(IFunc); } else { // This user is one we don't know how to handle, so fail redirection. This // will result in an ifunc retaining a resolver name that will ultimately // fail to be resolved to a defined function. return false; } } // Now we know this is a valid case where we can do this alias replacement, we // need to remove all of the references to Elem (and the bitcasts!) so we can // delete it. for (llvm::GlobalIFunc *IFunc : IFuncs) IFunc->setResolver(nullptr); for (llvm::ConstantExpr *ConstExpr : CEs) ConstExpr->destroyConstant(); // We should now be out of uses for the 'old' version of this function, so we // can erase it as well. Elem->eraseFromParent(); for (llvm::GlobalIFunc *IFunc : IFuncs) { // The type of the resolver is always just a function-type that returns the // type of the IFunc, so create that here. If the type of the actual // resolver doesn't match, it just gets bitcast to the right thing. auto *ResolverTy = llvm::FunctionType::get(IFunc->getType(), /*isVarArg*/ false); llvm::Constant *Resolver = GetOrCreateLLVMFunction( CppFunc->getName(), ResolverTy, {}, /*ForVTable*/ false); IFunc->setResolver(Resolver); } return true; } /// For each function which is declared within an extern "C" region and marked /// as 'used', but has internal linkage, create an alias from the unmangled /// name to the mangled name if possible. People expect to be able to refer /// to such functions with an unmangled name from inline assembly within the /// same translation unit. void CodeGenModule::EmitStaticExternCAliases() { if (!getTargetCodeGenInfo().shouldEmitStaticExternCAliases()) return; for (auto &I : StaticExternCValues) { const IdentifierInfo *Name = I.first; llvm::GlobalValue *Val = I.second; // If Val is null, that implies there were multiple declarations that each // had a claim to the unmangled name. In this case, generation of the alias // is suppressed. See CodeGenModule::MaybeHandleStaticInExternC. if (!Val) break; llvm::GlobalValue *ExistingElem = getModule().getNamedValue(Name->getName()); // If there is either not something already by this name, or we were able to // replace all uses from IFuncs, create the alias. if (!ExistingElem || CheckAndReplaceExternCIFuncs(ExistingElem, Val)) addCompilerUsedGlobal(llvm::GlobalAlias::create(Name->getName(), Val)); } } bool CodeGenModule::lookupRepresentativeDecl(StringRef MangledName, GlobalDecl &Result) const { auto Res = Manglings.find(MangledName); if (Res == Manglings.end()) return false; Result = Res->getValue(); return true; } /// Emits metadata nodes associating all the global values in the /// current module with the Decls they came from. This is useful for /// projects using IR gen as a subroutine. /// /// Since there's currently no way to associate an MDNode directly /// with an llvm::GlobalValue, we create a global named metadata /// with the name 'clang.global.decl.ptrs'. void CodeGenModule::EmitDeclMetadata() { llvm::NamedMDNode *GlobalMetadata = nullptr; for (auto &I : MangledDeclNames) { llvm::GlobalValue *Addr = getModule().getNamedValue(I.second); // Some mangled names don't necessarily have an associated GlobalValue // in this module, e.g. if we mangled it for DebugInfo. if (Addr) EmitGlobalDeclMetadata(*this, GlobalMetadata, I.first, Addr); } } /// Emits metadata nodes for all the local variables in the current /// function. void CodeGenFunction::EmitDeclMetadata() { if (LocalDeclMap.empty()) return; llvm::LLVMContext &Context = getLLVMContext(); // Find the unique metadata ID for this name. unsigned DeclPtrKind = Context.getMDKindID("clang.decl.ptr"); llvm::NamedMDNode *GlobalMetadata = nullptr; for (auto &I : LocalDeclMap) { const Decl *D = I.first; llvm::Value *Addr = I.second.emitRawPointer(*this); if (auto *Alloca = dyn_cast(Addr)) { llvm::Value *DAddr = GetPointerConstant(getLLVMContext(), D); Alloca->setMetadata( DeclPtrKind, llvm::MDNode::get( Context, llvm::ValueAsMetadata::getConstant(DAddr))); } else if (auto *GV = dyn_cast(Addr)) { GlobalDecl GD = GlobalDecl(cast(D)); EmitGlobalDeclMetadata(CGM, GlobalMetadata, GD, GV); } } } void CodeGenModule::EmitVersionIdentMetadata() { llvm::NamedMDNode *IdentMetadata = TheModule.getOrInsertNamedMetadata("llvm.ident"); std::string Version = getClangFullVersion(); llvm::LLVMContext &Ctx = TheModule.getContext(); llvm::Metadata *IdentNode[] = {llvm::MDString::get(Ctx, Version)}; IdentMetadata->addOperand(llvm::MDNode::get(Ctx, IdentNode)); } void CodeGenModule::EmitCommandLineMetadata() { llvm::NamedMDNode *CommandLineMetadata = TheModule.getOrInsertNamedMetadata("llvm.commandline"); std::string CommandLine = getCodeGenOpts().RecordCommandLine; llvm::LLVMContext &Ctx = TheModule.getContext(); llvm::Metadata *CommandLineNode[] = {llvm::MDString::get(Ctx, CommandLine)}; CommandLineMetadata->addOperand(llvm::MDNode::get(Ctx, CommandLineNode)); } void CodeGenModule::EmitCoverageFile() { llvm::NamedMDNode *CUNode = TheModule.getNamedMetadata("llvm.dbg.cu"); if (!CUNode) return; llvm::NamedMDNode *GCov = TheModule.getOrInsertNamedMetadata("llvm.gcov"); llvm::LLVMContext &Ctx = TheModule.getContext(); auto *CoverageDataFile = llvm::MDString::get(Ctx, getCodeGenOpts().CoverageDataFile); auto *CoverageNotesFile = llvm::MDString::get(Ctx, getCodeGenOpts().CoverageNotesFile); for (int i = 0, e = CUNode->getNumOperands(); i != e; ++i) { llvm::MDNode *CU = CUNode->getOperand(i); llvm::Metadata *Elts[] = {CoverageNotesFile, CoverageDataFile, CU}; GCov->addOperand(llvm::MDNode::get(Ctx, Elts)); } } llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH) { // Return a bogus pointer if RTTI is disabled, unless it's for EH. // FIXME: should we even be calling this method if RTTI is disabled // and it's not for EH? if (!shouldEmitRTTI(ForEH)) return llvm::Constant::getNullValue(GlobalsInt8PtrTy); if (ForEH && Ty->isObjCObjectPointerType() && LangOpts.ObjCRuntime.isGNUFamily()) return ObjCRuntime->GetEHType(Ty); return getCXXABI().getAddrOfRTTIDescriptor(Ty); } void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { // Do not emit threadprivates in simd-only mode. if (LangOpts.OpenMP && LangOpts.OpenMPSimd) return; for (auto RefExpr : D->varlists()) { auto *VD = cast(cast(RefExpr)->getDecl()); bool PerformInit = VD->getAnyInitializer() && !VD->getAnyInitializer()->isConstantInitializer(getContext(), /*ForRef=*/false); Address Addr(GetAddrOfGlobalVar(VD), getTypes().ConvertTypeForMem(VD->getType()), getContext().getDeclAlign(VD)); if (auto InitFunction = getOpenMPRuntime().emitThreadPrivateVarDefinition( VD, Addr, RefExpr->getBeginLoc(), PerformInit)) CXXGlobalInits.push_back(InitFunction); } } llvm::Metadata * CodeGenModule::CreateMetadataIdentifierImpl(QualType T, MetadataTypeMap &Map, StringRef Suffix) { if (auto *FnType = T->getAs()) T = getContext().getFunctionType( FnType->getReturnType(), FnType->getParamTypes(), FnType->getExtProtoInfo().withExceptionSpec(EST_None)); llvm::Metadata *&InternalId = Map[T.getCanonicalType()]; if (InternalId) return InternalId; if (isExternallyVisible(T->getLinkage())) { std::string OutName; llvm::raw_string_ostream Out(OutName); getCXXABI().getMangleContext().mangleCanonicalTypeName( T, Out, getCodeGenOpts().SanitizeCfiICallNormalizeIntegers); if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers) Out << ".normalized"; Out << Suffix; InternalId = llvm::MDString::get(getLLVMContext(), Out.str()); } else { InternalId = llvm::MDNode::getDistinct(getLLVMContext(), llvm::ArrayRef()); } return InternalId; } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) { return CreateMetadataIdentifierImpl(T, MetadataIdMap, ""); } llvm::Metadata * CodeGenModule::CreateMetadataIdentifierForVirtualMemPtrType(QualType T) { return CreateMetadataIdentifierImpl(T, VirtualMetadataIdMap, ".virtual"); } // Generalize pointer types to a void pointer with the qualifiers of the // originally pointed-to type, e.g. 'const char *' and 'char * const *' // generalize to 'const void *' while 'char *' and 'const char **' generalize to // 'void *'. static QualType GeneralizeType(ASTContext &Ctx, QualType Ty) { if (!Ty->isPointerType()) return Ty; return Ctx.getPointerType( QualType(Ctx.VoidTy).withCVRQualifiers( Ty->getPointeeType().getCVRQualifiers())); } // Apply type generalization to a FunctionType's return and argument types static QualType GeneralizeFunctionType(ASTContext &Ctx, QualType Ty) { if (auto *FnType = Ty->getAs()) { SmallVector GeneralizedParams; for (auto &Param : FnType->param_types()) GeneralizedParams.push_back(GeneralizeType(Ctx, Param)); return Ctx.getFunctionType( GeneralizeType(Ctx, FnType->getReturnType()), GeneralizedParams, FnType->getExtProtoInfo()); } if (auto *FnType = Ty->getAs()) return Ctx.getFunctionNoProtoType( GeneralizeType(Ctx, FnType->getReturnType())); llvm_unreachable("Encountered unknown FunctionType"); } llvm::Metadata *CodeGenModule::CreateMetadataIdentifierGeneralized(QualType T) { return CreateMetadataIdentifierImpl(GeneralizeFunctionType(getContext(), T), GeneralizedMetadataIdMap, ".generalized"); } /// Returns whether this module needs the "all-vtables" type identifier. bool CodeGenModule::NeedAllVtablesTypeId() const { // Returns true if at least one of vtable-based CFI checkers is enabled and // is not in the trapping mode. return ((LangOpts.Sanitize.has(SanitizerKind::CFIVCall) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIVCall)) || (LangOpts.Sanitize.has(SanitizerKind::CFINVCall) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFINVCall)) || (LangOpts.Sanitize.has(SanitizerKind::CFIDerivedCast) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIDerivedCast)) || (LangOpts.Sanitize.has(SanitizerKind::CFIUnrelatedCast) && !CodeGenOpts.SanitizeTrap.has(SanitizerKind::CFIUnrelatedCast))); } void CodeGenModule::AddVTableTypeMetadata(llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD) { llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0)); VTable->addTypeMetadata(Offset.getQuantity(), MD); if (CodeGenOpts.SanitizeCfiCrossDso) if (auto CrossDsoTypeId = CreateCrossDsoCfiTypeId(MD)) VTable->addTypeMetadata(Offset.getQuantity(), llvm::ConstantAsMetadata::get(CrossDsoTypeId)); if (NeedAllVtablesTypeId()) { llvm::Metadata *MD = llvm::MDString::get(getLLVMContext(), "all-vtables"); VTable->addTypeMetadata(Offset.getQuantity(), MD); } } llvm::SanitizerStatReport &CodeGenModule::getSanStats() { if (!SanStats) SanStats = std::make_unique(&getModule()); return *SanStats; } llvm::Value * CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E, CodeGenFunction &CGF) { llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType()); auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr()); auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false); auto *Call = CGF.EmitRuntimeCall( CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C}); return Call; } CharUnits CodeGenModule::getNaturalPointeeTypeAlignment( QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo) { return getNaturalTypeAlignment(T->getPointeeType(), BaseInfo, TBAAInfo, /* forPointeeType= */ true); } CharUnits CodeGenModule::getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo, TBAAAccessInfo *TBAAInfo, bool forPointeeType) { if (TBAAInfo) *TBAAInfo = getTBAAAccessInfo(T); // FIXME: This duplicates logic in ASTContext::getTypeAlignIfKnown. But // that doesn't return the information we need to compute BaseInfo. // Honor alignment typedef attributes even on incomplete types. // We also honor them straight for C++ class types, even as pointees; // there's an expressivity gap here. if (auto TT = T->getAs()) { if (auto Align = TT->getDecl()->getMaxAlignment()) { if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::AttributedType); return getContext().toCharUnitsFromBits(Align); } } bool AlignForArray = T->isArrayType(); // Analyze the base element type, so we don't get confused by incomplete // array types. T = getContext().getBaseElementType(T); if (T->isIncompleteType()) { // We could try to replicate the logic from // ASTContext::getTypeAlignIfKnown, but nothing uses the alignment if the // type is incomplete, so it's impossible to test. We could try to reuse // getTypeAlignIfKnown, but that doesn't return the information we need // to set BaseInfo. So just ignore the possibility that the alignment is // greater than one. if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::Type); return CharUnits::One(); } if (BaseInfo) *BaseInfo = LValueBaseInfo(AlignmentSource::Type); CharUnits Alignment; const CXXRecordDecl *RD; if (T.getQualifiers().hasUnaligned()) { Alignment = CharUnits::One(); } else if (forPointeeType && !AlignForArray && (RD = T->getAsCXXRecordDecl())) { // For C++ class pointees, we don't know whether we're pointing at a // base or a complete object, so we generally need to use the // non-virtual alignment. Alignment = getClassPointerAlignment(RD); } else { Alignment = getContext().getTypeAlignInChars(T); } // Cap to the global maximum type alignment unless the alignment // was somehow explicit on the type. if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) { if (Alignment.getQuantity() > MaxAlign && !getContext().isAlignmentRequired(T)) Alignment = CharUnits::fromQuantity(MaxAlign); } return Alignment; } bool CodeGenModule::stopAutoInit() { unsigned StopAfter = getContext().getLangOpts().TrivialAutoVarInitStopAfter; if (StopAfter) { // This number is positive only when -ftrivial-auto-var-init-stop-after=* is // used if (NumAutoVarInit >= StopAfter) { return true; } if (!NumAutoVarInit) { unsigned DiagID = getDiags().getCustomDiagID( DiagnosticsEngine::Warning, "-ftrivial-auto-var-init-stop-after=%0 has been enabled to limit the " "number of times ftrivial-auto-var-init=%1 gets applied."); getDiags().Report(DiagID) << StopAfter << (getContext().getLangOpts().getTrivialAutoVarInit() == LangOptions::TrivialAutoVarInitKind::Zero ? "zero" : "pattern"); } ++NumAutoVarInit; } return false; } void CodeGenModule::printPostfixForExternalizedDecl(llvm::raw_ostream &OS, const Decl *D) const { // ptxas does not allow '.' in symbol names. On the other hand, HIP prefers // postfix beginning with '.' since the symbol name can be demangled. if (LangOpts.HIP) OS << (isa(D) ? ".static." : ".intern."); else OS << (isa(D) ? "__static__" : "__intern__"); // If the CUID is not specified we try to generate a unique postfix. if (getLangOpts().CUID.empty()) { SourceManager &SM = getContext().getSourceManager(); PresumedLoc PLoc = SM.getPresumedLoc(D->getLocation()); assert(PLoc.isValid() && "Source location is expected to be valid."); // Get the hash of the user defined macros. llvm::MD5 Hash; llvm::MD5::MD5Result Result; for (const auto &Arg : PreprocessorOpts.Macros) Hash.update(Arg.first); Hash.final(Result); // Get the UniqueID for the file containing the decl. llvm::sys::fs::UniqueID ID; if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { PLoc = SM.getPresumedLoc(D->getLocation(), /*UseLineDirectives=*/false); assert(PLoc.isValid() && "Source location is expected to be valid."); if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) SM.getDiagnostics().Report(diag::err_cannot_open_file) << PLoc.getFilename() << EC.message(); } OS << llvm::format("%x", ID.getFile()) << llvm::format("%x", ID.getDevice()) << "_" << llvm::utohexstr(Result.low(), /*LowerCase=*/true, /*Width=*/8); } else { OS << getContext().getCUIDHash(); } } void CodeGenModule::moveLazyEmissionStates(CodeGenModule *NewBuilder) { assert(DeferredDeclsToEmit.empty() && "Should have emitted all decls deferred to emit."); assert(NewBuilder->DeferredDecls.empty() && "Newly created module should not have deferred decls"); NewBuilder->DeferredDecls = std::move(DeferredDecls); assert(EmittedDeferredDecls.empty() && "Still have (unmerged) EmittedDeferredDecls deferred decls"); assert(NewBuilder->DeferredVTables.empty() && "Newly created module should not have deferred vtables"); NewBuilder->DeferredVTables = std::move(DeferredVTables); assert(NewBuilder->MangledDeclNames.empty() && "Newly created module should not have mangled decl names"); assert(NewBuilder->Manglings.empty() && "Newly created module should not have manglings"); NewBuilder->Manglings = std::move(Manglings); NewBuilder->WeakRefReferences = std::move(WeakRefReferences); NewBuilder->ABI->MangleCtx = std::move(ABI->MangleCtx); } diff --git a/contrib/llvm-project/clang/lib/CodeGen/Targets/ARM.cpp b/contrib/llvm-project/clang/lib/CodeGen/Targets/ARM.cpp index d032b88d7683..457d761039a0 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/Targets/ARM.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/Targets/ARM.cpp @@ -1,811 +1,814 @@ //===- ARM.cpp ------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ABIInfoImpl.h" #include "TargetInfo.h" using namespace clang; using namespace clang::CodeGen; //===----------------------------------------------------------------------===// // ARM ABI Implementation //===----------------------------------------------------------------------===// namespace { class ARMABIInfo : public ABIInfo { ARMABIKind Kind; bool IsFloatABISoftFP; public: ARMABIInfo(CodeGenTypes &CGT, ARMABIKind Kind) : ABIInfo(CGT), Kind(Kind) { setCCs(); IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" || CGT.getCodeGenOpts().FloatABI == ""; // default } bool isEABI() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::Android: case llvm::Triple::EABI: case llvm::Triple::EABIHF: case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIT64: case llvm::Triple::GNUEABIHF: + case llvm::Triple::GNUEABIHFT64: case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: return true; default: return getTarget().getTriple().isOHOSFamily(); } } bool isEABIHF() const { switch (getTarget().getTriple().getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: + case llvm::Triple::GNUEABIHFT64: case llvm::Triple::MuslEABIHF: return true; default: return false; } } ARMABIKind getABIKind() const { return Kind; } bool allowBFloatArgsAndRet() const override { return !IsFloatABISoftFP && getTarget().hasBFloat16Type(); } private: ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic, unsigned functionCallConv) const; ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic, unsigned functionCallConv) const; ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const; ABIArgInfo coerceIllegalVector(QualType Ty) const; bool isIllegalVectorType(QualType Ty) const; bool containsAnyFP16Vectors(QualType Ty) const; bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; void computeInfo(CGFunctionInfo &FI) const override; RValue EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, AggValueSlot Slot) const override; llvm::CallingConv::ID getLLVMDefaultCC() const; llvm::CallingConv::ID getABIDefaultCC() const; void setCCs(); }; class ARMSwiftABIInfo : public SwiftABIInfo { public: explicit ARMSwiftABIInfo(CodeGenTypes &CGT) : SwiftABIInfo(CGT, /*SwiftErrorInRegister=*/true) {} bool isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, unsigned NumElts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { public: ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIKind K) : TargetCodeGenInfo(std::make_unique(CGT, K)) { SwiftInfo = std::make_unique(CGT); } int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override { return 13; } StringRef getARCRetainAutoreleasedReturnValueMarker() const override { return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue"; } bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const override { llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4); // 0-15 are the 16 integer registers. AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15); return false; } unsigned getSizeOfUnwindException() const override { if (getABIInfo().isEABI()) return 88; return TargetCodeGenInfo::getSizeOfUnwindException(); } void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override { if (GV->isDeclaration()) return; const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; auto *Fn = cast(GV); if (const auto *TA = FD->getAttr()) { ParsedTargetAttr Attr = CGM.getTarget().parseTargetAttr(TA->getFeaturesStr()); if (!Attr.BranchProtection.empty()) { TargetInfo::BranchProtectionInfo BPI{}; StringRef DiagMsg; StringRef Arch = Attr.CPU.empty() ? CGM.getTarget().getTargetOpts().CPU : Attr.CPU; if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection, Arch, BPI, DiagMsg)) { CGM.getDiags().Report( D->getLocation(), diag::warn_target_unsupported_branch_protection_attribute) << Arch; } else setBranchProtectionFnAttributes(BPI, (*Fn)); } else if (CGM.getLangOpts().BranchTargetEnforcement || CGM.getLangOpts().hasSignReturnAddress()) { // If the Branch Protection attribute is missing, validate the target // Architecture attribute against Branch Protection command line // settings. if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.CPU)) CGM.getDiags().Report( D->getLocation(), diag::warn_target_unsupported_branch_protection_attribute) << Attr.CPU; } } else if (CGM.getTarget().isBranchProtectionSupportedArch( CGM.getTarget().getTargetOpts().CPU)) { TargetInfo::BranchProtectionInfo BPI(CGM.getLangOpts()); setBranchProtectionFnAttributes(BPI, (*Fn)); } const ARMInterruptAttr *Attr = FD->getAttr(); if (!Attr) return; const char *Kind; switch (Attr->getInterrupt()) { case ARMInterruptAttr::Generic: Kind = ""; break; case ARMInterruptAttr::IRQ: Kind = "IRQ"; break; case ARMInterruptAttr::FIQ: Kind = "FIQ"; break; case ARMInterruptAttr::SWI: Kind = "SWI"; break; case ARMInterruptAttr::ABORT: Kind = "ABORT"; break; case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break; } Fn->addFnAttr("interrupt", Kind); ARMABIKind ABI = getABIInfo().getABIKind(); if (ABI == ARMABIKind::APCS) return; // AAPCS guarantees that sp will be 8-byte aligned on any public interface, // however this is not necessarily true on taking any interrupt. Instruct // the backend to perform a realignment as part of the function prologue. llvm::AttrBuilder B(Fn->getContext()); B.addStackAlignmentAttr(8); Fn->addFnAttrs(B); } }; class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo { public: WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIKind K) : ARMTargetCodeGenInfo(CGT, K) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib); } void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value, llvm::SmallString<32> &Opt) const override { Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\""; } }; void WindowsARMTargetCodeGenInfo::setTargetAttributes( const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const { ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM); if (GV->isDeclaration()) return; addStackProbeTargetAttributes(D, GV, CGM); } } void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { if (!::classifyReturnType(getCXXABI(), FI, *this)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), FI.getCallingConvention()); for (auto &I : FI.arguments()) I.info = classifyArgumentType(I.type, FI.isVariadic(), FI.getCallingConvention()); // Always honor user-specified calling convention. if (FI.getCallingConvention() != llvm::CallingConv::C) return; llvm::CallingConv::ID cc = getRuntimeCC(); if (cc != llvm::CallingConv::C) FI.setEffectiveCallingConvention(cc); } /// Return the default calling convention that LLVM will use. llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const { // The default calling convention that LLVM will infer. if (isEABIHF() || getTarget().getTriple().isWatchABI()) return llvm::CallingConv::ARM_AAPCS_VFP; else if (isEABI()) return llvm::CallingConv::ARM_AAPCS; else return llvm::CallingConv::ARM_APCS; } /// Return the calling convention that our ABI would like us to use /// as the C calling convention. llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const { switch (getABIKind()) { case ARMABIKind::APCS: return llvm::CallingConv::ARM_APCS; case ARMABIKind::AAPCS: return llvm::CallingConv::ARM_AAPCS; case ARMABIKind::AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; case ARMABIKind::AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP; } llvm_unreachable("bad ABI kind"); } void ARMABIInfo::setCCs() { assert(getRuntimeCC() == llvm::CallingConv::C); // Don't muddy up the IR with a ton of explicit annotations if // they'd just match what LLVM will infer from the triple. llvm::CallingConv::ID abiCC = getABIDefaultCC(); if (abiCC != getLLVMDefaultCC()) RuntimeCC = abiCC; } ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const { uint64_t Size = getContext().getTypeSize(Ty); if (Size <= 32) { llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } if (Size == 64 || Size == 128) { auto *ResType = llvm::FixedVectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); return ABIArgInfo::getDirect(ResType); } return getNaturalAlignIndirect(Ty, /*ByVal=*/false); } ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty, const Type *Base, uint64_t Members) const { assert(Base && "Base class should be set for homogeneous aggregate"); // Base can be a floating-point or a vector. if (const VectorType *VT = Base->getAs()) { // FP16 vectors should be converted to integer vectors if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) { uint64_t Size = getContext().getTypeSize(VT); auto *NewVecTy = llvm::FixedVectorType::get( llvm::Type::getInt32Ty(getVMContext()), Size / 32); llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members); return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } unsigned Align = 0; if (getABIKind() == ARMABIKind::AAPCS || getABIKind() == ARMABIKind::AAPCS_VFP) { // For alignment adjusted HFAs, cap the argument alignment to 8, leave it // default otherwise. Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity(); Align = (Align > BaseAlign && Align >= 8) ? 8 : 0; } return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align); } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic, unsigned functionCallConv) const { // 6.1.2.1 The following argument types are VFP CPRCs: // A single-precision floating-point type (including promoted // half-precision types); A double-precision floating-point type; // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate // with a Base Type of a single- or double-precision floating-point type, // 64-bit containerized vectors or 128-bit containerized vectors with one // to four Elements. // Variadic functions should always marshal to the base standard. bool IsAAPCS_VFP = !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false); Ty = useFirstFieldIfTransparentUnion(Ty); // Handle illegal vector types here. if (isIllegalVectorType(Ty)) return coerceIllegalVector(Ty); if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs()) { Ty = EnumTy->getDecl()->getIntegerType(); } if (const auto *EIT = Ty->getAs()) if (EIT->getNumBits() > 64) return getNaturalAlignIndirect(Ty, /*ByVal=*/true); return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty) : ABIArgInfo::getDirect()); } if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) { return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); } // Ignore empty records. if (isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); if (IsAAPCS_VFP) { // Homogeneous Aggregates need to be expanded when we can fit the aggregate // into VFP registers. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) return classifyHomogeneousAggregate(Ty, Base, Members); } else if (getABIKind() == ARMABIKind::AAPCS16_VFP) { // WatchOS does have homogeneous aggregates. Note that we intentionally use // this convention even for a variadic function: the backend will use GPRs // if needed. const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(Ty, Base, Members)) { assert(Base && Members <= 4 && "unexpected homogeneous aggregate"); llvm::Type *Ty = llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members); return ABIArgInfo::getDirect(Ty, 0, nullptr, false); } } if (getABIKind() == ARMABIKind::AAPCS16_VFP && getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) { // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're // bigger than 128-bits, they get placed in space allocated by the caller, // and a pointer is passed. return ABIArgInfo::getIndirect( CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false); } // Support byval for ARM. // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at // most 8-byte. We realign the indirect argument if type alignment is bigger // than ABI alignment. uint64_t ABIAlign = 4; uint64_t TyAlign; if (getABIKind() == ARMABIKind::AAPCS_VFP || getABIKind() == ARMABIKind::AAPCS) { TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity(); ABIAlign = std::clamp(TyAlign, (uint64_t)4, (uint64_t)8); } else { TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity(); } if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { assert(getABIKind() != ARMABIKind::AAPCS16_VFP && "unexpected byval"); return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign), /*ByVal=*/true, /*Realign=*/TyAlign > ABIAlign); } // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of // same size and alignment. if (getTarget().isRenderScriptTarget()) { return coerceToIntArray(Ty, getContext(), getVMContext()); } // Otherwise, pass by coercing to a structure of the appropriate size. llvm::Type* ElemTy; unsigned SizeRegs; // FIXME: Try to match the types of the arguments more accurately where // we can. if (TyAlign <= 4) { ElemTy = llvm::Type::getInt32Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32; } else { ElemTy = llvm::Type::getInt64Ty(getVMContext()); SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64; } return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs)); } static bool isIntegerLikeType(QualType Ty, ASTContext &Context, llvm::LLVMContext &VMContext) { // APCS, C Language Calling Conventions, Non-Simple Return Values: A structure // is called integer-like if its size is less than or equal to one word, and // the offset of each of its addressable sub-fields is zero. uint64_t Size = Context.getTypeSize(Ty); // Check that the type fits in a word. if (Size > 32) return false; // FIXME: Handle vector types! if (Ty->isVectorType()) return false; // Float types are never treated as "integer like". if (Ty->isRealFloatingType()) return false; // If this is a builtin or pointer type then it is ok. if (Ty->getAs() || Ty->isPointerType()) return true; // Small complex integer types are "integer like". if (const ComplexType *CT = Ty->getAs()) return isIntegerLikeType(CT->getElementType(), Context, VMContext); // Single element and zero sized arrays should be allowed, by the definition // above, but they are not. // Otherwise, it must be a record type. const RecordType *RT = Ty->getAs(); if (!RT) return false; // Ignore records with flexible arrays. const RecordDecl *RD = RT->getDecl(); if (RD->hasFlexibleArrayMember()) return false; // Check that all sub-fields are at offset 0, and are themselves "integer // like". const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); bool HadField = false; unsigned idx = 0; for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); i != e; ++i, ++idx) { const FieldDecl *FD = *i; // Bit-fields are not addressable, we only need to verify they are "integer // like". We still have to disallow a subsequent non-bitfield, for example: // struct { int : 0; int x } // is non-integer like according to gcc. if (FD->isBitField()) { if (!RD->isUnion()) HadField = true; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; continue; } // Check if this field is at offset 0. if (Layout.getFieldOffset(idx) != 0) return false; if (!isIntegerLikeType(FD->getType(), Context, VMContext)) return false; // Only allow at most one field in a structure. This doesn't match the // wording above, but follows gcc in situations with a field following an // empty structure. if (!RD->isUnion()) { if (HadField) return false; HadField = true; } } return true; } ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic, unsigned functionCallConv) const { // Variadic functions should always marshal to the base standard. bool IsAAPCS_VFP = !isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true); if (RetTy->isVoidType()) return ABIArgInfo::getIgnore(); if (const VectorType *VT = RetTy->getAs()) { // Large vector types should be returned via memory. if (getContext().getTypeSize(RetTy) > 128) return getNaturalAlignIndirect(RetTy); // TODO: FP16/BF16 vectors should be converted to integer vectors // This check is similar to isIllegalVectorType - refactor? if ((!getTarget().hasLegalHalfType() && (VT->getElementType()->isFloat16Type() || VT->getElementType()->isHalfType())) || (IsFloatABISoftFP && VT->getElementType()->isBFloat16Type())) return coerceIllegalVector(RetTy); } if (!isAggregateTypeForABI(RetTy)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) RetTy = EnumTy->getDecl()->getIntegerType(); if (const auto *EIT = RetTy->getAs()) if (EIT->getNumBits() > 64) return getNaturalAlignIndirect(RetTy, /*ByVal=*/false); return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect(); } // Are we following APCS? if (getABIKind() == ARMABIKind::APCS) { if (isEmptyRecord(getContext(), RetTy, false)) return ABIArgInfo::getIgnore(); // Complex types are all returned as packed integers. // // FIXME: Consider using 2 x vector types if the back end handles them // correctly. if (RetTy->isAnyComplexType()) return ABIArgInfo::getDirect(llvm::IntegerType::get( getVMContext(), getContext().getTypeSize(RetTy))); // Integer like structures are returned in r0. if (isIntegerLikeType(RetTy, getContext(), getVMContext())) { // Return in the smallest viable integer type. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } // Otherwise return in memory. return getNaturalAlignIndirect(RetTy); } // Otherwise this is an AAPCS variant. if (isEmptyRecord(getContext(), RetTy, true)) return ABIArgInfo::getIgnore(); // Check for homogeneous aggregates with AAPCS-VFP. if (IsAAPCS_VFP) { const Type *Base = nullptr; uint64_t Members = 0; if (isHomogeneousAggregate(RetTy, Base, Members)) return classifyHomogeneousAggregate(RetTy, Base, Members); } // Aggregates <= 4 bytes are returned in r0; other aggregates // are returned indirectly. uint64_t Size = getContext().getTypeSize(RetTy); if (Size <= 32) { // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of // same size and alignment. if (getTarget().isRenderScriptTarget()) { return coerceToIntArray(RetTy, getContext(), getVMContext()); } if (getDataLayout().isBigEndian()) // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4) return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); // Return in the smallest viable integer type. if (Size <= 8) return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext())); if (Size <= 16) return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext())); return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext())); } else if (Size <= 128 && getABIKind() == ARMABIKind::AAPCS16_VFP) { llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext()); llvm::Type *CoerceTy = llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32); return ABIArgInfo::getDirect(CoerceTy); } return getNaturalAlignIndirect(RetTy); } /// isIllegalVector - check whether Ty is an illegal vector type. bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { if (const VectorType *VT = Ty->getAs ()) { // On targets that don't support half, fp16 or bfloat, they are expanded // into float, and we don't want the ABI to depend on whether or not they // are supported in hardware. Thus return false to coerce vectors of these // types into integer vectors. // We do not depend on hasLegalHalfType for bfloat as it is a // separate IR type. if ((!getTarget().hasLegalHalfType() && (VT->getElementType()->isFloat16Type() || VT->getElementType()->isHalfType())) || (IsFloatABISoftFP && VT->getElementType()->isBFloat16Type())) return true; if (isAndroid()) { // Android shipped using Clang 3.1, which supported a slightly different // vector ABI. The primary differences were that 3-element vector types // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path // accepts that legacy behavior for Android only. // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); // NumElements should be power of 2 or equal to 3. if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3) return true; } else { // Check whether VT is legal. unsigned NumElements = VT->getNumElements(); uint64_t Size = getContext().getTypeSize(VT); // NumElements should be power of 2. if (!llvm::isPowerOf2_32(NumElements)) return true; // Size should be greater than 32 bits. return Size <= 32; } } return false; } /// Return true if a type contains any 16-bit floating point vectors bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const { if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) { uint64_t NElements = AT->getZExtSize(); if (NElements == 0) return false; return containsAnyFP16Vectors(AT->getElementType()); } else if (const RecordType *RT = Ty->getAs()) { const RecordDecl *RD = RT->getDecl(); // If this is a C++ record, check the bases first. if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) { return containsAnyFP16Vectors(B.getType()); })) return true; if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) { return FD && containsAnyFP16Vectors(FD->getType()); })) return true; return false; } else { if (const VectorType *VT = Ty->getAs()) return (VT->getElementType()->isFloat16Type() || VT->getElementType()->isBFloat16Type() || VT->getElementType()->isHalfType()); return false; } } bool ARMSwiftABIInfo::isLegalVectorType(CharUnits VectorSize, llvm::Type *EltTy, unsigned NumElts) const { if (!llvm::isPowerOf2_32(NumElts)) return false; unsigned size = CGT.getDataLayout().getTypeStoreSizeInBits(EltTy); if (size > 64) return false; if (VectorSize.getQuantity() != 8 && (VectorSize.getQuantity() != 16 || NumElts == 1)) return false; return true; } bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. if (const BuiltinType *BT = Ty->getAs()) { if (BT->getKind() == BuiltinType::Float || BT->getKind() == BuiltinType::Double || BT->getKind() == BuiltinType::LongDouble) return true; } else if (const VectorType *VT = Ty->getAs()) { unsigned VecSize = getContext().getTypeSize(VT); if (VecSize == 64 || VecSize == 128) return true; } return false; } bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const { return Members <= 4; } bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const { // AAPCS32 says that the rule for whether something is a homogeneous // aggregate is applied to the output of the data layout decision. So // anything that doesn't affect the data layout also does not affect // homogeneity. In particular, zero-length bitfields don't stop a struct // being homogeneous. return true; } bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const { // Give precedence to user-specified calling conventions. if (callConvention != llvm::CallingConv::C) return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP); else return (getABIKind() == ARMABIKind::AAPCS_VFP) || (acceptHalf && (getABIKind() == ARMABIKind::AAPCS16_VFP)); } RValue ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, AggValueSlot Slot) const { CharUnits SlotSize = CharUnits::fromQuantity(4); // Empty records are ignored for parameter passing purposes. if (isEmptyRecord(getContext(), Ty, true)) return Slot.asRValue(); CharUnits TySize = getContext().getTypeSizeInChars(Ty); CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty); // Use indirect if size of the illegal vector is bigger than 16 bytes. bool IsIndirect = false; const Type *Base = nullptr; uint64_t Members = 0; if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) { IsIndirect = true; // ARMv7k passes structs bigger than 16 bytes indirectly, in space // allocated by the caller. } else if (TySize > CharUnits::fromQuantity(16) && getABIKind() == ARMABIKind::AAPCS16_VFP && !isHomogeneousAggregate(Ty, Base, Members)) { IsIndirect = true; // Otherwise, bound the type's ABI alignment. // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte. // Our callers should be prepared to handle an under-aligned address. } else if (getABIKind() == ARMABIKind::AAPCS_VFP || getABIKind() == ARMABIKind::AAPCS) { TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8)); } else if (getABIKind() == ARMABIKind::AAPCS16_VFP) { // ARMv7k allows type alignment up to 16 bytes. TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4)); TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16)); } else { TyAlignForABI = CharUnits::fromQuantity(4); } TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo, SlotSize, /*AllowHigherAlign*/ true, Slot); } std::unique_ptr CodeGen::createARMTargetCodeGenInfo(CodeGenModule &CGM, ARMABIKind Kind) { return std::make_unique(CGM.getTypes(), Kind); } std::unique_ptr CodeGen::createWindowsARMTargetCodeGenInfo(CodeGenModule &CGM, ARMABIKind K) { return std::make_unique(CGM.getTypes(), K); } diff --git a/contrib/llvm-project/clang/lib/Driver/Driver.cpp b/contrib/llvm-project/clang/lib/Driver/Driver.cpp index 8e44d5afa40e..ecae475f75da 100644 --- a/contrib/llvm-project/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm-project/clang/lib/Driver/Driver.cpp @@ -1,6916 +1,6919 @@ //===--- Driver.cpp - Clang GCC Compatible Driver -------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "clang/Driver/Driver.h" #include "ToolChains/AIX.h" #include "ToolChains/AMDGPU.h" #include "ToolChains/AMDGPUOpenMP.h" #include "ToolChains/AVR.h" #include "ToolChains/Arch/RISCV.h" #include "ToolChains/BareMetal.h" #include "ToolChains/CSKYToolChain.h" #include "ToolChains/Clang.h" #include "ToolChains/CrossWindows.h" #include "ToolChains/Cuda.h" #include "ToolChains/Darwin.h" #include "ToolChains/DragonFly.h" #include "ToolChains/FreeBSD.h" #include "ToolChains/Fuchsia.h" #include "ToolChains/Gnu.h" #include "ToolChains/HIPAMD.h" #include "ToolChains/HIPSPV.h" #include "ToolChains/HLSL.h" #include "ToolChains/Haiku.h" #include "ToolChains/Hexagon.h" #include "ToolChains/Hurd.h" #include "ToolChains/Lanai.h" #include "ToolChains/Linux.h" #include "ToolChains/MSP430.h" #include "ToolChains/MSVC.h" #include "ToolChains/MinGW.h" #include "ToolChains/MipsLinux.h" #include "ToolChains/NaCl.h" #include "ToolChains/NetBSD.h" #include "ToolChains/OHOS.h" #include "ToolChains/OpenBSD.h" #include "ToolChains/PPCFreeBSD.h" #include "ToolChains/PPCLinux.h" #include "ToolChains/PS4CPU.h" #include "ToolChains/RISCVToolchain.h" #include "ToolChains/SPIRV.h" #include "ToolChains/Solaris.h" #include "ToolChains/TCE.h" #include "ToolChains/VEToolchain.h" #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" #include "ToolChains/ZOS.h" #include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/TargetID.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" #include "clang/Driver/Action.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/InputInfo.h" #include "clang/Driver/Job.h" #include "clang/Driver/Options.h" #include "clang/Driver/Phases.h" #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" #include "clang/Driver/Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Config/llvm-config.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ExitCodes.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" #include "llvm/Support/Regex.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Host.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include // ::getenv #include #include #include #include #include #if LLVM_ON_UNIX #include // getpid #endif using namespace clang::driver; using namespace clang; using namespace llvm::opt; static std::optional getOffloadTargetTriple(const Driver &D, const ArgList &Args) { auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); // Offload compilation flow does not support multiple targets for now. We // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) // to support multiple tool chains first. switch (OffloadTargets.size()) { default: D.Diag(diag::err_drv_only_one_offload_target_supported); return std::nullopt; case 0: D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; return std::nullopt; case 1: break; } return llvm::Triple(OffloadTargets[0]); } static std::optional getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, const llvm::Triple &HostTriple) { if (!Args.hasArg(options::OPT_offload_EQ)) { return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda"); } auto TT = getOffloadTargetTriple(D, Args); if (TT && (TT->getArch() == llvm::Triple::spirv32 || TT->getArch() == llvm::Triple::spirv64)) { if (Args.hasArg(options::OPT_emit_llvm)) return TT; D.Diag(diag::err_drv_cuda_offload_only_emit_bc); return std::nullopt; } D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); return std::nullopt; } static std::optional getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { if (!Args.hasArg(options::OPT_offload_EQ)) { auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ); if (llvm::find(OffloadArchs, "amdgcnspirv") != OffloadArchs.cend()) { if (OffloadArchs.size() == 1) return llvm::Triple("spirv64-amd-amdhsa"); // Mixing specific & SPIR-V compilation is not supported for now. D.Diag(diag::err_drv_only_one_offload_target_supported); return std::nullopt; } return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. } auto TT = getOffloadTargetTriple(D, Args); if (!TT) return std::nullopt; if (TT->getArch() == llvm::Triple::amdgcn && TT->getVendor() == llvm::Triple::AMD && TT->getOS() == llvm::Triple::AMDHSA) return TT; if (TT->getArch() == llvm::Triple::spirv64) return TT; D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); return std::nullopt; } // static std::string Driver::GetResourcesPath(StringRef BinaryPath, StringRef CustomResourceDir) { // Since the resource directory is embedded in the module hash, it's important // that all places that need it call this function, so that they get the // exact same string ("a/../b/" and "b/" get different hashes, for example). // Dir is bin/ or lib/, depending on where BinaryPath is. std::string Dir = std::string(llvm::sys::path::parent_path(BinaryPath)); SmallString<128> P(Dir); if (CustomResourceDir != "") { llvm::sys::path::append(P, CustomResourceDir); } else { // On Windows, libclang.dll is in bin/. // On non-Windows, libclang.so/.dylib is in lib/. // With a static-library build of libclang, LibClangPath will contain the // path of the embedding binary, which for LLVM binaries will be in bin/. // ../lib gets us to lib/ in both cases. P = llvm::sys::path::parent_path(Dir); // This search path is also created in the COFF driver of lld, so any // changes here also needs to happen in lld/COFF/Driver.cpp llvm::sys::path::append(P, CLANG_INSTALL_LIBDIR_BASENAME, "clang", CLANG_VERSION_MAJOR_STRING); } return std::string(P); } Driver::Driver(StringRef ClangExecutable, StringRef TargetTriple, DiagnosticsEngine &Diags, std::string Title, IntrusiveRefCntPtr VFS) : Diags(Diags), VFS(std::move(VFS)), Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), Offload(OffloadHostDevice), CXX20HeaderType(HeaderMode_None), ModulesModeCXX20(false), LTOMode(LTOK_None), ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT), DriverTitle(Title), CCCPrintBindings(false), CCPrintOptions(false), CCLogDiagnostics(false), CCGenDiagnostics(false), CCPrintProcessStats(false), CCPrintInternalStats(false), TargetTriple(TargetTriple), Saver(Alloc), PrependArg(nullptr), CheckInputsExist(true), ProbePrecompiled(true), SuppressMissingInputWarning(false) { // Provide a sane fallback if no VFS is specified. if (!this->VFS) this->VFS = llvm::vfs::getRealFileSystem(); Name = std::string(llvm::sys::path::filename(ClangExecutable)); Dir = std::string(llvm::sys::path::parent_path(ClangExecutable)); if ((!SysRoot.empty()) && llvm::sys::path::is_relative(SysRoot)) { // Prepend InstalledDir if SysRoot is relative SmallString<128> P(Dir); llvm::sys::path::append(P, SysRoot); SysRoot = std::string(P); } #if defined(CLANG_CONFIG_FILE_SYSTEM_DIR) SystemConfigDir = CLANG_CONFIG_FILE_SYSTEM_DIR; #endif #if defined(CLANG_CONFIG_FILE_USER_DIR) { SmallString<128> P; llvm::sys::fs::expand_tilde(CLANG_CONFIG_FILE_USER_DIR, P); UserConfigDir = static_cast(P); } #endif // Compute the path to the resource directory. ResourceDir = GetResourcesPath(ClangExecutable, CLANG_RESOURCE_DIR); } void Driver::setDriverMode(StringRef Value) { static StringRef OptName = getOpts().getOption(options::OPT_driver_mode).getPrefixedName(); if (auto M = llvm::StringSwitch>(Value) .Case("gcc", GCCMode) .Case("g++", GXXMode) .Case("cpp", CPPMode) .Case("cl", CLMode) .Case("flang", FlangMode) .Case("dxc", DXCMode) .Default(std::nullopt)) Mode = *M; else Diag(diag::err_drv_unsupported_option_argument) << OptName << Value; } InputArgList Driver::ParseArgStrings(ArrayRef ArgStrings, bool UseDriverMode, bool &ContainsError) { llvm::PrettyStackTraceString CrashInfo("Command line argument parsing"); ContainsError = false; llvm::opt::Visibility VisibilityMask = getOptionVisibilityMask(UseDriverMode); unsigned MissingArgIndex, MissingArgCount; InputArgList Args = getOpts().ParseArgs(ArgStrings, MissingArgIndex, MissingArgCount, VisibilityMask); // Check for missing argument error. if (MissingArgCount) { Diag(diag::err_drv_missing_argument) << Args.getArgString(MissingArgIndex) << MissingArgCount; ContainsError |= Diags.getDiagnosticLevel(diag::err_drv_missing_argument, SourceLocation()) > DiagnosticsEngine::Warning; } // Check for unsupported options. for (const Arg *A : Args) { if (A->getOption().hasFlag(options::Unsupported)) { Diag(diag::err_drv_unsupported_opt) << A->getAsString(Args); ContainsError |= Diags.getDiagnosticLevel(diag::err_drv_unsupported_opt, SourceLocation()) > DiagnosticsEngine::Warning; continue; } // Warn about -mcpu= without an argument. if (A->getOption().matches(options::OPT_mcpu_EQ) && A->containsValue("")) { Diag(diag::warn_drv_empty_joined_argument) << A->getAsString(Args); ContainsError |= Diags.getDiagnosticLevel( diag::warn_drv_empty_joined_argument, SourceLocation()) > DiagnosticsEngine::Warning; } } for (const Arg *A : Args.filtered(options::OPT_UNKNOWN)) { unsigned DiagID; auto ArgString = A->getAsString(Args); std::string Nearest; if (getOpts().findNearest(ArgString, Nearest, VisibilityMask) > 1) { if (!IsCLMode() && getOpts().findExact(ArgString, Nearest, llvm::opt::Visibility(options::CC1Option))) { DiagID = diag::err_drv_unknown_argument_with_suggestion; Diags.Report(DiagID) << ArgString << "-Xclang " + Nearest; } else { DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl : diag::err_drv_unknown_argument; Diags.Report(DiagID) << ArgString; } } else { DiagID = IsCLMode() ? diag::warn_drv_unknown_argument_clang_cl_with_suggestion : diag::err_drv_unknown_argument_with_suggestion; Diags.Report(DiagID) << ArgString << Nearest; } ContainsError |= Diags.getDiagnosticLevel(DiagID, SourceLocation()) > DiagnosticsEngine::Warning; } for (const Arg *A : Args.filtered(options::OPT_o)) { if (ArgStrings[A->getIndex()] == A->getSpelling()) continue; // Warn on joined arguments that are similar to a long argument. std::string ArgString = ArgStrings[A->getIndex()]; std::string Nearest; if (getOpts().findExact("-" + ArgString, Nearest, VisibilityMask)) Diags.Report(diag::warn_drv_potentially_misspelled_joined_argument) << A->getAsString(Args) << Nearest; } return Args; } // Determine which compilation mode we are in. We look for options which // affect the phase, starting with the earliest phases, and record which // option we used to determine the final phase. phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, Arg **FinalPhaseArg) const { Arg *PhaseArg = nullptr; phases::ID FinalPhase; // -{E,EP,P,M,MM} only run the preprocessor. if (CCCIsCPP() || (PhaseArg = DAL.getLastArg(options::OPT_E)) || (PhaseArg = DAL.getLastArg(options::OPT__SLASH_EP)) || (PhaseArg = DAL.getLastArg(options::OPT_M, options::OPT_MM)) || (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P)) || CCGenDiagnostics) { FinalPhase = phases::Preprocess; // --precompile only runs up to precompilation. // Options that cause the output of C++20 compiled module interfaces or // header units have the same effect. } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile)) || (PhaseArg = DAL.getLastArg(options::OPT_extract_api)) || (PhaseArg = DAL.getLastArg(options::OPT_fmodule_header, options::OPT_fmodule_header_EQ))) { FinalPhase = phases::Precompile; // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) || (PhaseArg = DAL.getLastArg(options::OPT_print_enabled_extensions)) || (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) || (PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) || (PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) || (PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) || (PhaseArg = DAL.getLastArg(options::OPT__migrate)) || (PhaseArg = DAL.getLastArg(options::OPT__analyze)) || (PhaseArg = DAL.getLastArg(options::OPT_emit_cir)) || (PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) { FinalPhase = phases::Compile; // -S only runs up to the backend. } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) { FinalPhase = phases::Backend; // -c compilation only runs up to the assembler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) { FinalPhase = phases::Assemble; } else if ((PhaseArg = DAL.getLastArg(options::OPT_emit_interface_stubs))) { FinalPhase = phases::IfsMerge; // Otherwise do everything. } else FinalPhase = phases::Link; if (FinalPhaseArg) *FinalPhaseArg = PhaseArg; return FinalPhase; } static Arg *MakeInputArg(DerivedArgList &Args, const OptTable &Opts, StringRef Value, bool Claim = true) { Arg *A = new Arg(Opts.getOption(options::OPT_INPUT), Value, Args.getBaseArgs().MakeIndex(Value), Value.data()); Args.AddSynthesizedArg(A); if (Claim) A->claim(); return A; } DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const { const llvm::opt::OptTable &Opts = getOpts(); DerivedArgList *DAL = new DerivedArgList(Args); bool HasNostdlib = Args.hasArg(options::OPT_nostdlib); bool HasNostdlibxx = Args.hasArg(options::OPT_nostdlibxx); bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs); bool IgnoreUnused = false; for (Arg *A : Args) { if (IgnoreUnused) A->claim(); if (A->getOption().matches(options::OPT_start_no_unused_arguments)) { IgnoreUnused = true; continue; } if (A->getOption().matches(options::OPT_end_no_unused_arguments)) { IgnoreUnused = false; continue; } // Unfortunately, we have to parse some forwarding options (-Xassembler, // -Xlinker, -Xpreprocessor) because we either integrate their functionality // (assembler and preprocessor), or bypass a previous driver ('collect2'). // Rewrite linker options, to replace --no-demangle with a custom internal // option. if ((A->getOption().matches(options::OPT_Wl_COMMA) || A->getOption().matches(options::OPT_Xlinker)) && A->containsValue("--no-demangle")) { // Add the rewritten no-demangle argument. DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_Xlinker__no_demangle)); // Add the remaining values as Xlinker arguments. for (StringRef Val : A->getValues()) if (Val != "--no-demangle") DAL->AddSeparateArg(A, Opts.getOption(options::OPT_Xlinker), Val); continue; } // Rewrite preprocessor options, to replace -Wp,-MD,FOO which is used by // some build systems. We don't try to be complete here because we don't // care to encourage this usage model. if (A->getOption().matches(options::OPT_Wp_COMMA) && (A->getValue(0) == StringRef("-MD") || A->getValue(0) == StringRef("-MMD"))) { // Rewrite to -MD/-MMD along with -MF. if (A->getValue(0) == StringRef("-MD")) DAL->AddFlagArg(A, Opts.getOption(options::OPT_MD)); else DAL->AddFlagArg(A, Opts.getOption(options::OPT_MMD)); if (A->getNumValues() == 2) DAL->AddSeparateArg(A, Opts.getOption(options::OPT_MF), A->getValue(1)); continue; } // Rewrite reserved library names. if (A->getOption().matches(options::OPT_l)) { StringRef Value = A->getValue(); // Rewrite unless -nostdlib is present. if (!HasNostdlib && !HasNodefaultlib && !HasNostdlibxx && Value == "stdc++") { DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_reserved_lib_stdcxx)); continue; } // Rewrite unconditionally. if (Value == "cc_kext") { DAL->AddFlagArg(A, Opts.getOption(options::OPT_Z_reserved_lib_cckext)); continue; } } // Pick up inputs via the -- option. if (A->getOption().matches(options::OPT__DASH_DASH)) { A->claim(); for (StringRef Val : A->getValues()) DAL->append(MakeInputArg(*DAL, Opts, Val, false)); continue; } DAL->append(A); } // DXC mode quits before assembly if an output object file isn't specified. if (IsDXCMode() && !Args.hasArg(options::OPT_dxc_Fo)) DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_S)); // Enforce -static if -miamcu is present. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) DAL->AddFlagArg(nullptr, Opts.getOption(options::OPT_static)); // Add a default value of -mlinker-version=, if one was given and the user // didn't specify one. #if defined(HOST_LINK_VERSION) if (!Args.hasArg(options::OPT_mlinker_version_EQ) && strlen(HOST_LINK_VERSION) > 0) { DAL->AddJoinedArg(0, Opts.getOption(options::OPT_mlinker_version_EQ), HOST_LINK_VERSION); DAL->getLastArg(options::OPT_mlinker_version_EQ)->claim(); } #endif return DAL; } /// Compute target triple from args. /// /// This routine provides the logic to compute a target triple from various /// args passed to the driver and the default triple string. static llvm::Triple computeTargetTriple(const Driver &D, StringRef TargetTriple, const ArgList &Args, StringRef DarwinArchName = "") { // FIXME: Already done in Compilation *Driver::BuildCompilation if (const Arg *A = Args.getLastArg(options::OPT_target)) TargetTriple = A->getValue(); llvm::Triple Target(llvm::Triple::normalize(TargetTriple)); // GNU/Hurd's triples should have been -hurd-gnu*, but were historically made // -gnu* only, and we can not change this, so we have to detect that case as // being the Hurd OS. if (TargetTriple.contains("-unknown-gnu") || TargetTriple.contains("-pc-gnu")) Target.setOSName("hurd"); // Handle Apple-specific options available here. if (Target.isOSBinFormatMachO()) { // If an explicit Darwin arch name is given, that trumps all. if (!DarwinArchName.empty()) { tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName, Args); return Target; } // Handle the Darwin '-arch' flag. if (Arg *A = Args.getLastArg(options::OPT_arch)) { StringRef ArchName = A->getValue(); tools::darwin::setTripleTypeForMachOArchName(Target, ArchName, Args); } } // Handle pseudo-target flags '-mlittle-endian'/'-EL' and // '-mbig-endian'/'-EB'. if (Arg *A = Args.getLastArgNoClaim(options::OPT_mlittle_endian, options::OPT_mbig_endian)) { llvm::Triple T = A->getOption().matches(options::OPT_mlittle_endian) ? Target.getLittleEndianArchVariant() : Target.getBigEndianArchVariant(); if (T.getArch() != llvm::Triple::UnknownArch) { Target = std::move(T); Args.claimAllArgs(options::OPT_mlittle_endian, options::OPT_mbig_endian); } } // Skip further flag support on OSes which don't support '-m32' or '-m64'. if (Target.getArch() == llvm::Triple::tce) return Target; // On AIX, the env OBJECT_MODE may affect the resulting arch variant. if (Target.isOSAIX()) { if (std::optional ObjectModeValue = llvm::sys::Process::GetEnv("OBJECT_MODE")) { StringRef ObjectMode = *ObjectModeValue; llvm::Triple::ArchType AT = llvm::Triple::UnknownArch; if (ObjectMode == "64") { AT = Target.get64BitArchVariant().getArch(); } else if (ObjectMode == "32") { AT = Target.get32BitArchVariant().getArch(); } else { D.Diag(diag::err_drv_invalid_object_mode) << ObjectMode; } if (AT != llvm::Triple::UnknownArch && AT != Target.getArch()) Target.setArch(AT); } } // The `-maix[32|64]` flags are only valid for AIX targets. if (Arg *A = Args.getLastArgNoClaim(options::OPT_maix32, options::OPT_maix64); A && !Target.isOSAIX()) D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args) << Target.str(); // Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'. Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32, options::OPT_m32, options::OPT_m16, options::OPT_maix32, options::OPT_maix64); if (A) { llvm::Triple::ArchType AT = llvm::Triple::UnknownArch; if (A->getOption().matches(options::OPT_m64) || A->getOption().matches(options::OPT_maix64)) { AT = Target.get64BitArchVariant().getArch(); - if (Target.getEnvironment() == llvm::Triple::GNUX32) + if (Target.getEnvironment() == llvm::Triple::GNUX32 || + Target.getEnvironment() == llvm::Triple::GNUT64) Target.setEnvironment(llvm::Triple::GNU); else if (Target.getEnvironment() == llvm::Triple::MuslX32) Target.setEnvironment(llvm::Triple::Musl); } else if (A->getOption().matches(options::OPT_mx32) && Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) { AT = llvm::Triple::x86_64; if (Target.getEnvironment() == llvm::Triple::Musl) Target.setEnvironment(llvm::Triple::MuslX32); else Target.setEnvironment(llvm::Triple::GNUX32); } else if (A->getOption().matches(options::OPT_m32) || A->getOption().matches(options::OPT_maix32)) { AT = Target.get32BitArchVariant().getArch(); if (Target.getEnvironment() == llvm::Triple::GNUX32) Target.setEnvironment(llvm::Triple::GNU); else if (Target.getEnvironment() == llvm::Triple::MuslX32) Target.setEnvironment(llvm::Triple::Musl); } else if (A->getOption().matches(options::OPT_m16) && Target.get32BitArchVariant().getArch() == llvm::Triple::x86) { AT = llvm::Triple::x86; Target.setEnvironment(llvm::Triple::CODE16); } if (AT != llvm::Triple::UnknownArch && AT != Target.getArch()) { Target.setArch(AT); if (Target.isWindowsGNUEnvironment()) toolchains::MinGW::fixTripleArch(D, Target, Args); } } // Handle -miamcu flag. if (Args.hasFlag(options::OPT_miamcu, options::OPT_mno_iamcu, false)) { if (Target.get32BitArchVariant().getArch() != llvm::Triple::x86) D.Diag(diag::err_drv_unsupported_opt_for_target) << "-miamcu" << Target.str(); if (A && !A->getOption().matches(options::OPT_m32)) D.Diag(diag::err_drv_argument_not_allowed_with) << "-miamcu" << A->getBaseArg().getAsString(Args); Target.setArch(llvm::Triple::x86); Target.setArchName("i586"); Target.setEnvironment(llvm::Triple::UnknownEnvironment); Target.setEnvironmentName(""); Target.setOS(llvm::Triple::ELFIAMCU); Target.setVendor(llvm::Triple::UnknownVendor); Target.setVendorName("intel"); } // If target is MIPS adjust the target triple // accordingly to provided ABI name. if (Target.isMIPS()) { if ((A = Args.getLastArg(options::OPT_mabi_EQ))) { StringRef ABIName = A->getValue(); if (ABIName == "32") { Target = Target.get32BitArchVariant(); if (Target.getEnvironment() == llvm::Triple::GNUABI64 || Target.getEnvironment() == llvm::Triple::GNUABIN32) Target.setEnvironment(llvm::Triple::GNU); } else if (ABIName == "n32") { Target = Target.get64BitArchVariant(); if (Target.getEnvironment() == llvm::Triple::GNU || + Target.getEnvironment() == llvm::Triple::GNUT64 || Target.getEnvironment() == llvm::Triple::GNUABI64) Target.setEnvironment(llvm::Triple::GNUABIN32); } else if (ABIName == "64") { Target = Target.get64BitArchVariant(); if (Target.getEnvironment() == llvm::Triple::GNU || + Target.getEnvironment() == llvm::Triple::GNUT64 || Target.getEnvironment() == llvm::Triple::GNUABIN32) Target.setEnvironment(llvm::Triple::GNUABI64); } } } // If target is RISC-V adjust the target triple according to // provided architecture name if (Target.isRISCV()) { if (Args.hasArg(options::OPT_march_EQ) || Args.hasArg(options::OPT_mcpu_EQ)) { std::string ArchName = tools::riscv::getRISCVArch(Args, Target); auto ISAInfo = llvm::RISCVISAInfo::parseArchString( ArchName, /*EnableExperimentalExtensions=*/true); if (!llvm::errorToBool(ISAInfo.takeError())) { unsigned XLen = (*ISAInfo)->getXLen(); if (XLen == 32) Target.setArch(llvm::Triple::riscv32); else if (XLen == 64) Target.setArch(llvm::Triple::riscv64); } } } return Target; } // Parse the LTO options and record the type of LTO compilation // based on which -f(no-)?lto(=.*)? or -f(no-)?offload-lto(=.*)? // option occurs last. static driver::LTOKind parseLTOMode(Driver &D, const llvm::opt::ArgList &Args, OptSpecifier OptEq, OptSpecifier OptNeg) { if (!Args.hasFlag(OptEq, OptNeg, false)) return LTOK_None; const Arg *A = Args.getLastArg(OptEq); StringRef LTOName = A->getValue(); driver::LTOKind LTOMode = llvm::StringSwitch(LTOName) .Case("full", LTOK_Full) .Case("thin", LTOK_Thin) .Default(LTOK_Unknown); if (LTOMode == LTOK_Unknown) { D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << A->getValue(); return LTOK_None; } return LTOMode; } // Parse the LTO options. void Driver::setLTOMode(const llvm::opt::ArgList &Args) { LTOMode = parseLTOMode(*this, Args, options::OPT_flto_EQ, options::OPT_fno_lto); OffloadLTOMode = parseLTOMode(*this, Args, options::OPT_foffload_lto_EQ, options::OPT_fno_offload_lto); // Try to enable `-foffload-lto=full` if `-fopenmp-target-jit` is on. if (Args.hasFlag(options::OPT_fopenmp_target_jit, options::OPT_fno_openmp_target_jit, false)) { if (Arg *A = Args.getLastArg(options::OPT_foffload_lto_EQ, options::OPT_fno_offload_lto)) if (OffloadLTOMode != LTOK_Full) Diag(diag::err_drv_incompatible_options) << A->getSpelling() << "-fopenmp-target-jit"; OffloadLTOMode = LTOK_Full; } } /// Compute the desired OpenMP runtime from the flags provided. Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME); const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ); if (A) RuntimeName = A->getValue(); auto RT = llvm::StringSwitch(RuntimeName) .Case("libomp", OMPRT_OMP) .Case("libgomp", OMPRT_GOMP) .Case("libiomp5", OMPRT_IOMP5) .Default(OMPRT_Unknown); if (RT == OMPRT_Unknown) { if (A) Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << A->getValue(); else // FIXME: We could use a nicer diagnostic here. Diag(diag::err_drv_unsupported_opt) << "-fopenmp"; } return RT; } void Driver::CreateOffloadingDeviceToolChains(Compilation &C, InputList &Inputs) { // // CUDA/HIP // // We need to generate a CUDA/HIP toolchain if any of the inputs has a CUDA // or HIP type. However, mixed CUDA/HIP compilation is not supported. bool IsCuda = llvm::any_of(Inputs, [](std::pair &I) { return types::isCuda(I.first); }); bool IsHIP = llvm::any_of(Inputs, [](std::pair &I) { return types::isHIP(I.first); }) || C.getInputArgs().hasArg(options::OPT_hip_link) || C.getInputArgs().hasArg(options::OPT_hipstdpar); if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; } if (IsCuda) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); const llvm::Triple &HostTriple = HostTC->getTriple(); auto OFK = Action::OFK_Cuda; auto CudaTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), HostTriple); if (!CudaTriple) return; // Use the CUDA and host triples as the key into the ToolChains map, // because the device toolchain we create depends on both. auto &CudaTC = ToolChains[CudaTriple->str() + "/" + HostTriple.str()]; if (!CudaTC) { CudaTC = std::make_unique( *this, *CudaTriple, *HostTC, C.getInputArgs()); // Emit a warning if the detected CUDA version is too new. CudaInstallationDetector &CudaInstallation = static_cast(*CudaTC).CudaInstallation; if (CudaInstallation.isValid()) CudaInstallation.WarnIfUnsupportedVersion(); } C.addOffloadDeviceToolChain(CudaTC.get(), OFK); } else if (IsHIP) { if (auto *OMPTargetArg = C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { Diag(clang::diag::err_drv_unsupported_opt_for_language_mode) << OMPTargetArg->getSpelling() << "HIP"; return; } const ToolChain *HostTC = C.getSingleOffloadToolChain(); auto OFK = Action::OFK_HIP; auto HIPTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); if (!HIPTriple) return; auto *HIPTC = &getOffloadingDeviceToolChain(C.getInputArgs(), *HIPTriple, *HostTC, OFK); assert(HIPTC && "Could not create offloading device tool chain."); C.addOffloadDeviceToolChain(HIPTC, OFK); } // // OpenMP // // We need to generate an OpenMP toolchain if the user specified targets with // the -fopenmp-targets option or used --offload-arch with OpenMP enabled. bool IsOpenMPOffloading = C.getInputArgs().hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false) && (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ) || C.getInputArgs().hasArg(options::OPT_offload_arch_EQ)); if (IsOpenMPOffloading) { // We expect that -fopenmp-targets is always used in conjunction with the // option -fopenmp specifying a valid runtime with offloading support, i.e. // libomp or libiomp. OpenMPRuntimeKind RuntimeKind = getOpenMPRuntime(C.getInputArgs()); if (RuntimeKind != OMPRT_OMP && RuntimeKind != OMPRT_IOMP5) { Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); return; } llvm::StringMap> DerivedArchs; llvm::StringMap FoundNormalizedTriples; std::multiset OpenMPTriples; // If the user specified -fopenmp-targets= we create a toolchain for each // valid triple. Otherwise, if only --offload-arch= was specified we instead // attempt to derive the appropriate toolchains from the arguments. if (Arg *OpenMPTargets = C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { if (OpenMPTargets && !OpenMPTargets->getNumValues()) { Diag(clang::diag::warn_drv_empty_joined_argument) << OpenMPTargets->getAsString(C.getInputArgs()); return; } for (StringRef T : OpenMPTargets->getValues()) OpenMPTriples.insert(T); } else if (C.getInputArgs().hasArg(options::OPT_offload_arch_EQ) && !IsHIP && !IsCuda) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); auto AMDTriple = getHIPOffloadTargetTriple(*this, C.getInputArgs()); auto NVPTXTriple = getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), HostTC->getTriple()); // Attempt to deduce the offloading triple from the set of architectures. // We can only correctly deduce NVPTX / AMDGPU triples currently. We need // to temporarily create these toolchains so that we can access tools for // inferring architectures. llvm::DenseSet Archs; if (NVPTXTriple) { auto TempTC = std::make_unique( *this, *NVPTXTriple, *HostTC, C.getInputArgs()); for (StringRef Arch : getOffloadArchs( C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) Archs.insert(Arch); } if (AMDTriple) { auto TempTC = std::make_unique( *this, *AMDTriple, *HostTC, C.getInputArgs()); for (StringRef Arch : getOffloadArchs( C, C.getArgs(), Action::OFK_OpenMP, &*TempTC, true)) Archs.insert(Arch); } if (!AMDTriple && !NVPTXTriple) { for (StringRef Arch : getOffloadArchs(C, C.getArgs(), Action::OFK_OpenMP, nullptr, true)) Archs.insert(Arch); } for (StringRef Arch : Archs) { if (NVPTXTriple && IsNVIDIAOffloadArch(StringToOffloadArch( getProcessorFromTargetID(*NVPTXTriple, Arch)))) { DerivedArchs[NVPTXTriple->getTriple()].insert(Arch); } else if (AMDTriple && IsAMDOffloadArch(StringToOffloadArch( getProcessorFromTargetID(*AMDTriple, Arch)))) { DerivedArchs[AMDTriple->getTriple()].insert(Arch); } else { Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << Arch; return; } } // If the set is empty then we failed to find a native architecture. if (Archs.empty()) { Diag(clang::diag::err_drv_failed_to_deduce_target_from_arch) << "native"; return; } for (const auto &TripleAndArchs : DerivedArchs) OpenMPTriples.insert(TripleAndArchs.first()); } for (StringRef Val : OpenMPTriples) { llvm::Triple TT(ToolChain::getOpenMPTriple(Val)); std::string NormalizedName = TT.normalize(); // Make sure we don't have a duplicate triple. auto Duplicate = FoundNormalizedTriples.find(NormalizedName); if (Duplicate != FoundNormalizedTriples.end()) { Diag(clang::diag::warn_drv_omp_offload_target_duplicate) << Val << Duplicate->second; continue; } // Store the current triple so that we can check for duplicates in the // following iterations. FoundNormalizedTriples[NormalizedName] = Val; // If the specified target is invalid, emit a diagnostic. if (TT.getArch() == llvm::Triple::UnknownArch) Diag(clang::diag::err_drv_invalid_omp_target) << Val; else { const ToolChain *TC; // Device toolchains have to be selected differently. They pair host // and device in their implementation. if (TT.isNVPTX() || TT.isAMDGCN()) { const ToolChain *HostTC = C.getSingleOffloadToolChain(); assert(HostTC && "Host toolchain should be always defined."); auto &DeviceTC = ToolChains[TT.str() + "/" + HostTC->getTriple().normalize()]; if (!DeviceTC) { if (TT.isNVPTX()) DeviceTC = std::make_unique( *this, TT, *HostTC, C.getInputArgs()); else if (TT.isAMDGCN()) DeviceTC = std::make_unique( *this, TT, *HostTC, C.getInputArgs()); else assert(DeviceTC && "Device toolchain not defined."); } TC = DeviceTC.get(); } else TC = &getToolChain(C.getInputArgs(), TT); C.addOffloadDeviceToolChain(TC, Action::OFK_OpenMP); if (DerivedArchs.contains(TT.getTriple())) KnownArchs[TC] = DerivedArchs[TT.getTriple()]; } } } else if (C.getInputArgs().hasArg(options::OPT_fopenmp_targets_EQ)) { Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); return; } // // TODO: Add support for other offloading programming models here. // } static void appendOneArg(InputArgList &Args, const Arg *Opt, const Arg *BaseArg) { // The args for config files or /clang: flags belong to different InputArgList // objects than Args. This copies an Arg from one of those other InputArgLists // to the ownership of Args. unsigned Index = Args.MakeIndex(Opt->getSpelling()); Arg *Copy = new llvm::opt::Arg(Opt->getOption(), Args.getArgString(Index), Index, BaseArg); Copy->getValues() = Opt->getValues(); if (Opt->isClaimed()) Copy->claim(); Copy->setOwnsValues(Opt->getOwnsValues()); Opt->setOwnsValues(false); Args.append(Copy); } bool Driver::readConfigFile(StringRef FileName, llvm::cl::ExpansionContext &ExpCtx) { // Try opening the given file. auto Status = getVFS().status(FileName); if (!Status) { Diag(diag::err_drv_cannot_open_config_file) << FileName << Status.getError().message(); return true; } if (Status->getType() != llvm::sys::fs::file_type::regular_file) { Diag(diag::err_drv_cannot_open_config_file) << FileName << "not a regular file"; return true; } // Try reading the given file. SmallVector NewCfgArgs; if (llvm::Error Err = ExpCtx.readConfigFile(FileName, NewCfgArgs)) { Diag(diag::err_drv_cannot_read_config_file) << FileName << toString(std::move(Err)); return true; } // Read options from config file. llvm::SmallString<128> CfgFileName(FileName); llvm::sys::path::native(CfgFileName); bool ContainErrors; std::unique_ptr NewOptions = std::make_unique( ParseArgStrings(NewCfgArgs, /*UseDriverMode=*/true, ContainErrors)); if (ContainErrors) return true; // Claim all arguments that come from a configuration file so that the driver // does not warn on any that is unused. for (Arg *A : *NewOptions) A->claim(); if (!CfgOptions) CfgOptions = std::move(NewOptions); else { // If this is a subsequent config file, append options to the previous one. for (auto *Opt : *NewOptions) { const Arg *BaseArg = &Opt->getBaseArg(); if (BaseArg == Opt) BaseArg = nullptr; appendOneArg(*CfgOptions, Opt, BaseArg); } } ConfigFiles.push_back(std::string(CfgFileName)); return false; } bool Driver::loadConfigFiles() { llvm::cl::ExpansionContext ExpCtx(Saver.getAllocator(), llvm::cl::tokenizeConfigFile); ExpCtx.setVFS(&getVFS()); // Process options that change search path for config files. if (CLOptions) { if (CLOptions->hasArg(options::OPT_config_system_dir_EQ)) { SmallString<128> CfgDir; CfgDir.append( CLOptions->getLastArgValue(options::OPT_config_system_dir_EQ)); if (CfgDir.empty() || getVFS().makeAbsolute(CfgDir)) SystemConfigDir.clear(); else SystemConfigDir = static_cast(CfgDir); } if (CLOptions->hasArg(options::OPT_config_user_dir_EQ)) { SmallString<128> CfgDir; llvm::sys::fs::expand_tilde( CLOptions->getLastArgValue(options::OPT_config_user_dir_EQ), CfgDir); if (CfgDir.empty() || getVFS().makeAbsolute(CfgDir)) UserConfigDir.clear(); else UserConfigDir = static_cast(CfgDir); } } // Prepare list of directories where config file is searched for. StringRef CfgFileSearchDirs[] = {UserConfigDir, SystemConfigDir, Dir}; ExpCtx.setSearchDirs(CfgFileSearchDirs); // First try to load configuration from the default files, return on error. if (loadDefaultConfigFiles(ExpCtx)) return true; // Then load configuration files specified explicitly. SmallString<128> CfgFilePath; if (CLOptions) { for (auto CfgFileName : CLOptions->getAllArgValues(options::OPT_config)) { // If argument contains directory separator, treat it as a path to // configuration file. if (llvm::sys::path::has_parent_path(CfgFileName)) { CfgFilePath.assign(CfgFileName); if (llvm::sys::path::is_relative(CfgFilePath)) { if (getVFS().makeAbsolute(CfgFilePath)) { Diag(diag::err_drv_cannot_open_config_file) << CfgFilePath << "cannot get absolute path"; return true; } } } else if (!ExpCtx.findConfigFile(CfgFileName, CfgFilePath)) { // Report an error that the config file could not be found. Diag(diag::err_drv_config_file_not_found) << CfgFileName; for (const StringRef &SearchDir : CfgFileSearchDirs) if (!SearchDir.empty()) Diag(diag::note_drv_config_file_searched_in) << SearchDir; return true; } // Try to read the config file, return on error. if (readConfigFile(CfgFilePath, ExpCtx)) return true; } } // No error occurred. return false; } bool Driver::loadDefaultConfigFiles(llvm::cl::ExpansionContext &ExpCtx) { // Disable default config if CLANG_NO_DEFAULT_CONFIG is set to a non-empty // value. if (const char *NoConfigEnv = ::getenv("CLANG_NO_DEFAULT_CONFIG")) { if (*NoConfigEnv) return false; } if (CLOptions && CLOptions->hasArg(options::OPT_no_default_config)) return false; std::string RealMode = getExecutableForDriverMode(Mode); std::string Triple; // If name prefix is present, no --target= override was passed via CLOptions // and the name prefix is not a valid triple, force it for backwards // compatibility. if (!ClangNameParts.TargetPrefix.empty() && computeTargetTriple(*this, "/invalid/", *CLOptions).str() == "/invalid/") { llvm::Triple PrefixTriple{ClangNameParts.TargetPrefix}; if (PrefixTriple.getArch() == llvm::Triple::UnknownArch || PrefixTriple.isOSUnknown()) Triple = PrefixTriple.str(); } // Otherwise, use the real triple as used by the driver. if (Triple.empty()) { llvm::Triple RealTriple = computeTargetTriple(*this, TargetTriple, *CLOptions); Triple = RealTriple.str(); assert(!Triple.empty()); } // Search for config files in the following order: // 1. -.cfg using real driver mode // (e.g. i386-pc-linux-gnu-clang++.cfg). // 2. -.cfg using executable suffix // (e.g. i386-pc-linux-gnu-clang-g++.cfg for *clang-g++). // 3. .cfg + .cfg using real driver mode // (e.g. i386-pc-linux-gnu.cfg + clang++.cfg). // 4. .cfg + .cfg using executable suffix // (e.g. i386-pc-linux-gnu.cfg + clang-g++.cfg for *clang-g++). // Try loading -.cfg, and return if we find a match. SmallString<128> CfgFilePath; std::string CfgFileName = Triple + '-' + RealMode + ".cfg"; if (ExpCtx.findConfigFile(CfgFileName, CfgFilePath)) return readConfigFile(CfgFilePath, ExpCtx); bool TryModeSuffix = !ClangNameParts.ModeSuffix.empty() && ClangNameParts.ModeSuffix != RealMode; if (TryModeSuffix) { CfgFileName = Triple + '-' + ClangNameParts.ModeSuffix + ".cfg"; if (ExpCtx.findConfigFile(CfgFileName, CfgFilePath)) return readConfigFile(CfgFilePath, ExpCtx); } // Try loading .cfg, and return if loading failed. If a matching file // was not found, still proceed on to try .cfg. CfgFileName = RealMode + ".cfg"; if (ExpCtx.findConfigFile(CfgFileName, CfgFilePath)) { if (readConfigFile(CfgFilePath, ExpCtx)) return true; } else if (TryModeSuffix) { CfgFileName = ClangNameParts.ModeSuffix + ".cfg"; if (ExpCtx.findConfigFile(CfgFileName, CfgFilePath) && readConfigFile(CfgFilePath, ExpCtx)) return true; } // Try loading .cfg and return if we find a match. CfgFileName = Triple + ".cfg"; if (ExpCtx.findConfigFile(CfgFileName, CfgFilePath)) return readConfigFile(CfgFilePath, ExpCtx); // If we were unable to find a config file deduced from executable name, // that is not an error. return false; } Compilation *Driver::BuildCompilation(ArrayRef ArgList) { llvm::PrettyStackTraceString CrashInfo("Compilation construction"); // FIXME: Handle environment options which affect driver behavior, somewhere // (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS. // We look for the driver mode option early, because the mode can affect // how other options are parsed. auto DriverMode = getDriverMode(ClangExecutable, ArgList.slice(1)); if (!DriverMode.empty()) setDriverMode(DriverMode); // FIXME: What are we going to do with -V and -b? // Arguments specified in command line. bool ContainsError; CLOptions = std::make_unique( ParseArgStrings(ArgList.slice(1), /*UseDriverMode=*/true, ContainsError)); // Try parsing configuration file. if (!ContainsError) ContainsError = loadConfigFiles(); bool HasConfigFile = !ContainsError && (CfgOptions.get() != nullptr); // All arguments, from both config file and command line. InputArgList Args = std::move(HasConfigFile ? std::move(*CfgOptions) : std::move(*CLOptions)); if (HasConfigFile) for (auto *Opt : *CLOptions) { if (Opt->getOption().matches(options::OPT_config)) continue; const Arg *BaseArg = &Opt->getBaseArg(); if (BaseArg == Opt) BaseArg = nullptr; appendOneArg(Args, Opt, BaseArg); } // In CL mode, look for any pass-through arguments if (IsCLMode() && !ContainsError) { SmallVector CLModePassThroughArgList; for (const auto *A : Args.filtered(options::OPT__SLASH_clang)) { A->claim(); CLModePassThroughArgList.push_back(A->getValue()); } if (!CLModePassThroughArgList.empty()) { // Parse any pass through args using default clang processing rather // than clang-cl processing. auto CLModePassThroughOptions = std::make_unique( ParseArgStrings(CLModePassThroughArgList, /*UseDriverMode=*/false, ContainsError)); if (!ContainsError) for (auto *Opt : *CLModePassThroughOptions) { appendOneArg(Args, Opt, nullptr); } } } // Check for working directory option before accessing any files if (Arg *WD = Args.getLastArg(options::OPT_working_directory)) if (VFS->setCurrentWorkingDirectory(WD->getValue())) Diag(diag::err_drv_unable_to_set_working_directory) << WD->getValue(); // Check for missing include directories. if (!Diags.isIgnored(diag::warn_missing_include_dirs, SourceLocation())) { for (auto IncludeDir : Args.getAllArgValues(options::OPT_I_Group)) { if (!VFS->exists(IncludeDir)) Diag(diag::warn_missing_include_dirs) << IncludeDir; } } // FIXME: This stuff needs to go into the Compilation, not the driver. bool CCCPrintPhases; // -canonical-prefixes, -no-canonical-prefixes are used very early in main. Args.ClaimAllArgs(options::OPT_canonical_prefixes); Args.ClaimAllArgs(options::OPT_no_canonical_prefixes); // f(no-)integated-cc1 is also used very early in main. Args.ClaimAllArgs(options::OPT_fintegrated_cc1); Args.ClaimAllArgs(options::OPT_fno_integrated_cc1); // Ignore -pipe. Args.ClaimAllArgs(options::OPT_pipe); // Extract -ccc args. // // FIXME: We need to figure out where this behavior should live. Most of it // should be outside in the client; the parts that aren't should have proper // options, either by introducing new ones or by overloading gcc ones like -V // or -b. CCCPrintPhases = Args.hasArg(options::OPT_ccc_print_phases); CCCPrintBindings = Args.hasArg(options::OPT_ccc_print_bindings); if (const Arg *A = Args.getLastArg(options::OPT_ccc_gcc_name)) CCCGenericGCCName = A->getValue(); // Process -fproc-stat-report options. if (const Arg *A = Args.getLastArg(options::OPT_fproc_stat_report_EQ)) { CCPrintProcessStats = true; CCPrintStatReportFilename = A->getValue(); } if (Args.hasArg(options::OPT_fproc_stat_report)) CCPrintProcessStats = true; // FIXME: TargetTriple is used by the target-prefixed calls to as/ld // and getToolChain is const. if (IsCLMode()) { // clang-cl targets MSVC-style Win32. llvm::Triple T(TargetTriple); T.setOS(llvm::Triple::Win32); T.setVendor(llvm::Triple::PC); T.setEnvironment(llvm::Triple::MSVC); T.setObjectFormat(llvm::Triple::COFF); if (Args.hasArg(options::OPT__SLASH_arm64EC)) T.setArch(llvm::Triple::aarch64, llvm::Triple::AArch64SubArch_arm64ec); TargetTriple = T.str(); } else if (IsDXCMode()) { // Build TargetTriple from target_profile option for clang-dxc. if (const Arg *A = Args.getLastArg(options::OPT_target_profile)) { StringRef TargetProfile = A->getValue(); if (auto Triple = toolchains::HLSLToolChain::parseTargetProfile(TargetProfile)) TargetTriple = *Triple; else Diag(diag::err_drv_invalid_directx_shader_module) << TargetProfile; A->claim(); if (Args.hasArg(options::OPT_spirv)) { llvm::Triple T(TargetTriple); T.setArch(llvm::Triple::spirv); T.setOS(llvm::Triple::Vulkan); // Set specific Vulkan version if applicable. if (const Arg *A = Args.getLastArg(options::OPT_fspv_target_env_EQ)) { const llvm::StringSet<> ValidValues = {"vulkan1.2", "vulkan1.3"}; if (ValidValues.contains(A->getValue())) { T.setOSName(A->getValue()); } else { Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); } A->claim(); } TargetTriple = T.str(); } } else { Diag(diag::err_drv_dxc_missing_target_profile); } } if (const Arg *A = Args.getLastArg(options::OPT_target)) TargetTriple = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_ccc_install_dir)) Dir = Dir = A->getValue(); for (const Arg *A : Args.filtered(options::OPT_B)) { A->claim(); PrefixDirs.push_back(A->getValue(0)); } if (std::optional CompilerPathValue = llvm::sys::Process::GetEnv("COMPILER_PATH")) { StringRef CompilerPath = *CompilerPathValue; while (!CompilerPath.empty()) { std::pair Split = CompilerPath.split(llvm::sys::EnvPathSeparator); PrefixDirs.push_back(std::string(Split.first)); CompilerPath = Split.second; } } if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ)) SysRoot = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ)) DyldPrefix = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_resource_dir)) ResourceDir = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_save_temps_EQ)) { SaveTemps = llvm::StringSwitch(A->getValue()) .Case("cwd", SaveTempsCwd) .Case("obj", SaveTempsObj) .Default(SaveTempsCwd); } if (const Arg *A = Args.getLastArg(options::OPT_offload_host_only, options::OPT_offload_device_only, options::OPT_offload_host_device)) { if (A->getOption().matches(options::OPT_offload_host_only)) Offload = OffloadHost; else if (A->getOption().matches(options::OPT_offload_device_only)) Offload = OffloadDevice; else Offload = OffloadHostDevice; } setLTOMode(Args); // Process -fembed-bitcode= flags. if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { StringRef Name = A->getValue(); unsigned Model = llvm::StringSwitch(Name) .Case("off", EmbedNone) .Case("all", EmbedBitcode) .Case("bitcode", EmbedBitcode) .Case("marker", EmbedMarker) .Default(~0U); if (Model == ~0U) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name; } else BitcodeEmbed = static_cast(Model); } // Remove existing compilation database so that each job can append to it. if (Arg *A = Args.getLastArg(options::OPT_MJ)) llvm::sys::fs::remove(A->getValue()); // Setting up the jobs for some precompile cases depends on whether we are // treating them as PCH, implicit modules or C++20 ones. // TODO: inferring the mode like this seems fragile (it meets the objective // of not requiring anything new for operation, however). const Arg *Std = Args.getLastArg(options::OPT_std_EQ); ModulesModeCXX20 = !Args.hasArg(options::OPT_fmodules) && Std && (Std->containsValue("c++20") || Std->containsValue("c++2a") || Std->containsValue("c++23") || Std->containsValue("c++2b") || Std->containsValue("c++26") || Std->containsValue("c++2c") || Std->containsValue("c++latest")); // Process -fmodule-header{=} flags. if (Arg *A = Args.getLastArg(options::OPT_fmodule_header_EQ, options::OPT_fmodule_header)) { // These flags force C++20 handling of headers. ModulesModeCXX20 = true; if (A->getOption().matches(options::OPT_fmodule_header)) CXX20HeaderType = HeaderMode_Default; else { StringRef ArgName = A->getValue(); unsigned Kind = llvm::StringSwitch(ArgName) .Case("user", HeaderMode_User) .Case("system", HeaderMode_System) .Default(~0U); if (Kind == ~0U) { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << ArgName; } else CXX20HeaderType = static_cast(Kind); } } std::unique_ptr UArgs = std::make_unique(std::move(Args)); // Perform the default argument translations. DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs); // Owned by the host. const ToolChain &TC = getToolChain( *UArgs, computeTargetTriple(*this, TargetTriple, *UArgs)); // Check if the environment version is valid except wasm case. llvm::Triple Triple = TC.getTriple(); if (!Triple.isWasm()) { StringRef TripleVersionName = Triple.getEnvironmentVersionString(); StringRef TripleObjectFormat = Triple.getObjectFormatTypeName(Triple.getObjectFormat()); if (Triple.getEnvironmentVersion().empty() && TripleVersionName != "" && TripleVersionName != TripleObjectFormat) { Diags.Report(diag::err_drv_triple_version_invalid) << TripleVersionName << TC.getTripleString(); ContainsError = true; } } // Report warning when arm64EC option is overridden by specified target if ((TC.getTriple().getArch() != llvm::Triple::aarch64 || TC.getTriple().getSubArch() != llvm::Triple::AArch64SubArch_arm64ec) && UArgs->hasArg(options::OPT__SLASH_arm64EC)) { getDiags().Report(clang::diag::warn_target_override_arm64ec) << TC.getTriple().str(); } // A common user mistake is specifying a target of aarch64-none-eabi or // arm-none-elf whereas the correct names are aarch64-none-elf & // arm-none-eabi. Detect these cases and issue a warning. if (TC.getTriple().getOS() == llvm::Triple::UnknownOS && TC.getTriple().getVendor() == llvm::Triple::UnknownVendor) { switch (TC.getTriple().getArch()) { case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: if (TC.getTriple().getEnvironmentName() == "elf") { Diag(diag::warn_target_unrecognized_env) << TargetTriple << (TC.getTriple().getArchName().str() + "-none-eabi"); } break; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::aarch64_32: if (TC.getTriple().getEnvironmentName().starts_with("eabi")) { Diag(diag::warn_target_unrecognized_env) << TargetTriple << (TC.getTriple().getArchName().str() + "-none-elf"); } break; default: break; } } // The compilation takes ownership of Args. Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs, ContainsError); if (!HandleImmediateArgs(*C)) return C; // Construct the list of inputs. InputList Inputs; BuildInputs(C->getDefaultToolChain(), *TranslatedArgs, Inputs); // Populate the tool chains for the offloading devices, if any. CreateOffloadingDeviceToolChains(*C, Inputs); // Construct the list of abstract actions to perform for this compilation. On // MachO targets this uses the driver-driver and universal actions. if (TC.getTriple().isOSBinFormatMachO()) BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs); else BuildActions(*C, C->getArgs(), Inputs, C->getActions()); if (CCCPrintPhases) { PrintActions(*C); return C; } BuildJobs(*C); return C; } static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) { llvm::opt::ArgStringList ASL; for (const auto *A : Args) { // Use user's original spelling of flags. For example, use // `/source-charset:utf-8` instead of `-finput-charset=utf-8` if the user // wrote the former. while (A->getAlias()) A = A->getAlias(); A->render(Args, ASL); } for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) { if (I != ASL.begin()) OS << ' '; llvm::sys::printArg(OS, *I, true); } OS << '\n'; } bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename, SmallString<128> &CrashDiagDir) { using namespace llvm::sys; assert(llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin() && "Only knows about .crash files on Darwin"); // The .crash file can be found on at ~/Library/Logs/DiagnosticReports/ // (or /Library/Logs/DiagnosticReports for root) and has the filename pattern // clang-__.crash. path::home_directory(CrashDiagDir); if (CrashDiagDir.starts_with("/var/root")) CrashDiagDir = "/"; path::append(CrashDiagDir, "Library/Logs/DiagnosticReports"); int PID = #if LLVM_ON_UNIX getpid(); #else 0; #endif std::error_code EC; fs::file_status FileStatus; TimePoint<> LastAccessTime; SmallString<128> CrashFilePath; // Lookup the .crash files and get the one generated by a subprocess spawned // by this driver invocation. for (fs::directory_iterator File(CrashDiagDir, EC), FileEnd; File != FileEnd && !EC; File.increment(EC)) { StringRef FileName = path::filename(File->path()); if (!FileName.starts_with(Name)) continue; if (fs::status(File->path(), FileStatus)) continue; llvm::ErrorOr> CrashFile = llvm::MemoryBuffer::getFile(File->path()); if (!CrashFile) continue; // The first line should start with "Process:", otherwise this isn't a real // .crash file. StringRef Data = CrashFile.get()->getBuffer(); if (!Data.starts_with("Process:")) continue; // Parse parent process pid line, e.g: "Parent Process: clang-4.0 [79141]" size_t ParentProcPos = Data.find("Parent Process:"); if (ParentProcPos == StringRef::npos) continue; size_t LineEnd = Data.find_first_of("\n", ParentProcPos); if (LineEnd == StringRef::npos) continue; StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim(); int OpenBracket = -1, CloseBracket = -1; for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) { if (ParentProcess[i] == '[') OpenBracket = i; if (ParentProcess[i] == ']') CloseBracket = i; } // Extract the parent process PID from the .crash file and check whether // it matches this driver invocation pid. int CrashPID; if (OpenBracket < 0 || CloseBracket < 0 || ParentProcess.slice(OpenBracket + 1, CloseBracket) .getAsInteger(10, CrashPID) || CrashPID != PID) { continue; } // Found a .crash file matching the driver pid. To avoid getting an older // and misleading crash file, continue looking for the most recent. // FIXME: the driver can dispatch multiple cc1 invocations, leading to // multiple crashes poiting to the same parent process. Since the driver // does not collect pid information for the dispatched invocation there's // currently no way to distinguish among them. const auto FileAccessTime = FileStatus.getLastModificationTime(); if (FileAccessTime > LastAccessTime) { CrashFilePath.assign(File->path()); LastAccessTime = FileAccessTime; } } // If found, copy it over to the location of other reproducer files. if (!CrashFilePath.empty()) { EC = fs::copy_file(CrashFilePath, ReproCrashFilename); if (EC) return false; return true; } return false; } static const char BugReporMsg[] = "\n********************\n\n" "PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:\n" "Preprocessed source(s) and associated run script(s) are located at:"; // When clang crashes, produce diagnostic information including the fully // preprocessed source file(s). Request that the developer attach the // diagnostic information to a bug report. void Driver::generateCompilationDiagnostics( Compilation &C, const Command &FailingCommand, StringRef AdditionalInformation, CompilationDiagnosticReport *Report) { if (C.getArgs().hasArg(options::OPT_fno_crash_diagnostics)) return; unsigned Level = 1; if (Arg *A = C.getArgs().getLastArg(options::OPT_fcrash_diagnostics_EQ)) { Level = llvm::StringSwitch(A->getValue()) .Case("off", 0) .Case("compiler", 1) .Case("all", 2) .Default(1); } if (!Level) return; // Don't try to generate diagnostics for dsymutil jobs. if (FailingCommand.getCreator().isDsymutilJob()) return; bool IsLLD = false; ArgStringList SavedTemps; if (FailingCommand.getCreator().isLinkJob()) { C.getDefaultToolChain().GetLinkerPath(&IsLLD); if (!IsLLD || Level < 2) return; // If lld crashed, we will re-run the same command with the input it used // to have. In that case we should not remove temp files in // initCompilationForDiagnostics yet. They will be added back and removed // later. SavedTemps = std::move(C.getTempFiles()); assert(!C.getTempFiles().size()); } // Print the version of the compiler. PrintVersion(C, llvm::errs()); // Suppress driver output and emit preprocessor output to temp file. CCGenDiagnostics = true; // Save the original job command(s). Command Cmd = FailingCommand; // Keep track of whether we produce any errors while trying to produce // preprocessed sources. DiagnosticErrorTrap Trap(Diags); // Suppress tool output. C.initCompilationForDiagnostics(); // If lld failed, rerun it again with --reproduce. if (IsLLD) { const char *TmpName = CreateTempFile(C, "linker-crash", "tar"); Command NewLLDInvocation = Cmd; llvm::opt::ArgStringList ArgList = NewLLDInvocation.getArguments(); StringRef ReproduceOption = C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment() ? "/reproduce:" : "--reproduce="; ArgList.push_back(Saver.save(Twine(ReproduceOption) + TmpName).data()); NewLLDInvocation.replaceArguments(std::move(ArgList)); // Redirect stdout/stderr to /dev/null. NewLLDInvocation.Execute({std::nullopt, {""}, {""}}, nullptr, nullptr); Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; Diag(clang::diag::note_drv_command_failed_diag_msg) << TmpName; Diag(clang::diag::note_drv_command_failed_diag_msg) << "\n\n********************"; if (Report) Report->TemporaryFiles.push_back(TmpName); return; } // Construct the list of inputs. InputList Inputs; BuildInputs(C.getDefaultToolChain(), C.getArgs(), Inputs); for (InputList::iterator it = Inputs.begin(), ie = Inputs.end(); it != ie;) { bool IgnoreInput = false; // Ignore input from stdin or any inputs that cannot be preprocessed. // Check type first as not all linker inputs have a value. if (types::getPreprocessedType(it->first) == types::TY_INVALID) { IgnoreInput = true; } else if (!strcmp(it->second->getValue(), "-")) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s) - " "ignoring input from stdin."; IgnoreInput = true; } if (IgnoreInput) { it = Inputs.erase(it); ie = Inputs.end(); } else { ++it; } } if (Inputs.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s) - " "no preprocessable inputs."; return; } // Don't attempt to generate preprocessed files if multiple -arch options are // used, unless they're all duplicates. llvm::StringSet<> ArchNames; for (const Arg *A : C.getArgs()) { if (A->getOption().matches(options::OPT_arch)) { StringRef ArchName = A->getValue(); ArchNames.insert(ArchName); } } if (ArchNames.size() > 1) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s) - cannot generate " "preprocessed source with multiple -arch options."; return; } // Construct the list of abstract actions to perform for this compilation. On // Darwin OSes this uses the driver-driver and builds universal actions. const ToolChain &TC = C.getDefaultToolChain(); if (TC.getTriple().isOSBinFormatMachO()) BuildUniversalActions(C, TC, Inputs); else BuildActions(C, C.getArgs(), Inputs, C.getActions()); BuildJobs(C); // If there were errors building the compilation, quit now. if (Trap.hasErrorOccurred()) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s)."; return; } // Generate preprocessed output. SmallVector, 4> FailingCommands; C.ExecuteJobs(C.getJobs(), FailingCommands); // If any of the preprocessing commands failed, clean up and exit. if (!FailingCommands.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s)."; return; } const ArgStringList &TempFiles = C.getTempFiles(); if (TempFiles.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s)."; return; } Diag(clang::diag::note_drv_command_failed_diag_msg) << BugReporMsg; SmallString<128> VFS; SmallString<128> ReproCrashFilename; for (const char *TempFile : TempFiles) { Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile; if (Report) Report->TemporaryFiles.push_back(TempFile); if (ReproCrashFilename.empty()) { ReproCrashFilename = TempFile; llvm::sys::path::replace_extension(ReproCrashFilename, ".crash"); } if (StringRef(TempFile).ends_with(".cache")) { // In some cases (modules) we'll dump extra data to help with reproducing // the crash into a directory next to the output. VFS = llvm::sys::path::filename(TempFile); llvm::sys::path::append(VFS, "vfs", "vfs.yaml"); } } for (const char *TempFile : SavedTemps) C.addTempFile(TempFile); // Assume associated files are based off of the first temporary file. CrashReportInfo CrashInfo(TempFiles[0], VFS); llvm::SmallString<128> Script(CrashInfo.Filename); llvm::sys::path::replace_extension(Script, "sh"); std::error_code EC; llvm::raw_fd_ostream ScriptOS(Script, EC, llvm::sys::fs::CD_CreateNew, llvm::sys::fs::FA_Write, llvm::sys::fs::OF_Text); if (EC) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating run script: " << Script << " " << EC.message(); } else { ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n" << "# Driver args: "; printArgList(ScriptOS, C.getInputArgs()); ScriptOS << "# Original command: "; Cmd.Print(ScriptOS, "\n", /*Quote=*/true); Cmd.Print(ScriptOS, "\n", /*Quote=*/true, &CrashInfo); if (!AdditionalInformation.empty()) ScriptOS << "\n# Additional information: " << AdditionalInformation << "\n"; if (Report) Report->TemporaryFiles.push_back(std::string(Script)); Diag(clang::diag::note_drv_command_failed_diag_msg) << Script; } // On darwin, provide information about the .crash diagnostic report. if (llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin()) { SmallString<128> CrashDiagDir; if (getCrashDiagnosticFile(ReproCrashFilename, CrashDiagDir)) { Diag(clang::diag::note_drv_command_failed_diag_msg) << ReproCrashFilename.str(); } else { // Suggest a directory for the user to look for .crash files. llvm::sys::path::append(CrashDiagDir, Name); CrashDiagDir += "__.crash"; Diag(clang::diag::note_drv_command_failed_diag_msg) << "Crash backtrace is located in"; Diag(clang::diag::note_drv_command_failed_diag_msg) << CrashDiagDir.str(); Diag(clang::diag::note_drv_command_failed_diag_msg) << "(choose the .crash file that corresponds to your crash)"; } } Diag(clang::diag::note_drv_command_failed_diag_msg) << "\n\n********************"; } void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) { // Since commandLineFitsWithinSystemLimits() may underestimate system's // capacity if the tool does not support response files, there is a chance/ // that things will just work without a response file, so we silently just // skip it. if (Cmd.getResponseFileSupport().ResponseKind == ResponseFileSupport::RF_None || llvm::sys::commandLineFitsWithinSystemLimits(Cmd.getExecutable(), Cmd.getArguments())) return; std::string TmpName = GetTemporaryPath("response", "txt"); Cmd.setResponseFile(C.addTempFile(C.getArgs().MakeArgString(TmpName))); } int Driver::ExecuteCompilation( Compilation &C, SmallVectorImpl> &FailingCommands) { if (C.getArgs().hasArg(options::OPT_fdriver_only)) { if (C.getArgs().hasArg(options::OPT_v)) C.getJobs().Print(llvm::errs(), "\n", true); C.ExecuteJobs(C.getJobs(), FailingCommands, /*LogOnly=*/true); // If there were errors building the compilation, quit now. if (!FailingCommands.empty() || Diags.hasErrorOccurred()) return 1; return 0; } // Just print if -### was present. if (C.getArgs().hasArg(options::OPT__HASH_HASH_HASH)) { C.getJobs().Print(llvm::errs(), "\n", true); return Diags.hasErrorOccurred() ? 1 : 0; } // If there were errors building the compilation, quit now. if (Diags.hasErrorOccurred()) return 1; // Set up response file names for each command, if necessary. for (auto &Job : C.getJobs()) setUpResponseFiles(C, Job); C.ExecuteJobs(C.getJobs(), FailingCommands); // If the command succeeded, we are done. if (FailingCommands.empty()) return 0; // Otherwise, remove result files and print extra information about abnormal // failures. int Res = 0; for (const auto &CmdPair : FailingCommands) { int CommandRes = CmdPair.first; const Command *FailingCommand = CmdPair.second; // Remove result files if we're not saving temps. if (!isSaveTempsEnabled()) { const JobAction *JA = cast(&FailingCommand->getSource()); C.CleanupFileMap(C.getResultFiles(), JA, true); // Failure result files are valid unless we crashed. if (CommandRes < 0) C.CleanupFileMap(C.getFailureResultFiles(), JA, true); } // llvm/lib/Support/*/Signals.inc will exit with a special return code // for SIGPIPE. Do not print diagnostics for this case. if (CommandRes == EX_IOERR) { Res = CommandRes; continue; } // Print extra information about abnormal failures, if possible. // // This is ad-hoc, but we don't want to be excessively noisy. If the result // status was 1, assume the command failed normally. In particular, if it // was the compiler then assume it gave a reasonable error code. Failures // in other tools are less common, and they generally have worse // diagnostics, so always print the diagnostic there. const Tool &FailingTool = FailingCommand->getCreator(); if (!FailingCommand->getCreator().hasGoodDiagnostics() || CommandRes != 1) { // FIXME: See FIXME above regarding result code interpretation. if (CommandRes < 0) Diag(clang::diag::err_drv_command_signalled) << FailingTool.getShortName(); else Diag(clang::diag::err_drv_command_failed) << FailingTool.getShortName() << CommandRes; } } return Res; } void Driver::PrintHelp(bool ShowHidden) const { llvm::opt::Visibility VisibilityMask = getOptionVisibilityMask(); std::string Usage = llvm::formatv("{0} [options] file...", Name).str(); getOpts().printHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(), ShowHidden, /*ShowAllAliases=*/false, VisibilityMask); } void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { if (IsFlangMode()) { OS << getClangToolFullVersion("flang-new") << '\n'; } else { // FIXME: The following handlers should use a callback mechanism, we don't // know what the client would like to do. OS << getClangFullVersion() << '\n'; } const ToolChain &TC = C.getDefaultToolChain(); OS << "Target: " << TC.getTripleString() << '\n'; // Print the threading model. if (Arg *A = C.getArgs().getLastArg(options::OPT_mthread_model)) { // Don't print if the ToolChain would have barfed on it already if (TC.isThreadModelSupported(A->getValue())) OS << "Thread model: " << A->getValue(); } else OS << "Thread model: " << TC.getThreadModel(); OS << '\n'; // Print out the install directory. OS << "InstalledDir: " << Dir << '\n'; // Print the build config if it's non-default. // Intended to help LLVM developers understand the configs of compilers // they're investigating. if (!llvm::cl::getCompilerBuildConfig().empty()) llvm::cl::printBuildConfig(OS); // If configuration files were used, print their paths. for (auto ConfigFile : ConfigFiles) OS << "Configuration file: " << ConfigFile << '\n'; } /// PrintDiagnosticCategories - Implement the --print-diagnostic-categories /// option. static void PrintDiagnosticCategories(raw_ostream &OS) { // Skip the empty category. for (unsigned i = 1, max = DiagnosticIDs::getNumberOfCategories(); i != max; ++i) OS << i << ',' << DiagnosticIDs::getCategoryNameFromID(i) << '\n'; } void Driver::HandleAutocompletions(StringRef PassedFlags) const { if (PassedFlags == "") return; // Print out all options that start with a given argument. This is used for // shell autocompletion. std::vector SuggestedCompletions; std::vector Flags; llvm::opt::Visibility VisibilityMask(options::ClangOption); // Make sure that Flang-only options don't pollute the Clang output // TODO: Make sure that Clang-only options don't pollute Flang output if (IsFlangMode()) VisibilityMask = llvm::opt::Visibility(options::FlangOption); // Distinguish "--autocomplete=-someflag" and "--autocomplete=-someflag," // because the latter indicates that the user put space before pushing tab // which should end up in a file completion. const bool HasSpace = PassedFlags.ends_with(","); // Parse PassedFlags by "," as all the command-line flags are passed to this // function separated by "," StringRef TargetFlags = PassedFlags; while (TargetFlags != "") { StringRef CurFlag; std::tie(CurFlag, TargetFlags) = TargetFlags.split(","); Flags.push_back(std::string(CurFlag)); } // We want to show cc1-only options only when clang is invoked with -cc1 or // -Xclang. if (llvm::is_contained(Flags, "-Xclang") || llvm::is_contained(Flags, "-cc1")) VisibilityMask = llvm::opt::Visibility(options::CC1Option); const llvm::opt::OptTable &Opts = getOpts(); StringRef Cur; Cur = Flags.at(Flags.size() - 1); StringRef Prev; if (Flags.size() >= 2) { Prev = Flags.at(Flags.size() - 2); SuggestedCompletions = Opts.suggestValueCompletions(Prev, Cur); } if (SuggestedCompletions.empty()) SuggestedCompletions = Opts.suggestValueCompletions(Cur, ""); // If Flags were empty, it means the user typed `clang [tab]` where we should // list all possible flags. If there was no value completion and the user // pressed tab after a space, we should fall back to a file completion. // We're printing a newline to be consistent with what we print at the end of // this function. if (SuggestedCompletions.empty() && HasSpace && !Flags.empty()) { llvm::outs() << '\n'; return; } // When flag ends with '=' and there was no value completion, return empty // string and fall back to the file autocompletion. if (SuggestedCompletions.empty() && !Cur.ends_with("=")) { // If the flag is in the form of "--autocomplete=-foo", // we were requested to print out all option names that start with "-foo". // For example, "--autocomplete=-fsyn" is expanded to "-fsyntax-only". SuggestedCompletions = Opts.findByPrefix( Cur, VisibilityMask, /*DisableFlags=*/options::Unsupported | options::Ignored); // We have to query the -W flags manually as they're not in the OptTable. // TODO: Find a good way to add them to OptTable instead and them remove // this code. for (StringRef S : DiagnosticIDs::getDiagnosticFlags()) if (S.starts_with(Cur)) SuggestedCompletions.push_back(std::string(S)); } // Sort the autocomplete candidates so that shells print them out in a // deterministic order. We could sort in any way, but we chose // case-insensitive sorting for consistency with the -help option // which prints out options in the case-insensitive alphabetical order. llvm::sort(SuggestedCompletions, [](StringRef A, StringRef B) { if (int X = A.compare_insensitive(B)) return X < 0; return A.compare(B) > 0; }); llvm::outs() << llvm::join(SuggestedCompletions, "\n") << '\n'; } bool Driver::HandleImmediateArgs(Compilation &C) { // The order these options are handled in gcc is all over the place, but we // don't expect inconsistencies w.r.t. that to matter in practice. if (C.getArgs().hasArg(options::OPT_dumpmachine)) { llvm::outs() << C.getDefaultToolChain().getTripleString() << '\n'; return false; } if (C.getArgs().hasArg(options::OPT_dumpversion)) { // Since -dumpversion is only implemented for pedantic GCC compatibility, we // return an answer which matches our definition of __VERSION__. llvm::outs() << CLANG_VERSION_STRING << "\n"; return false; } if (C.getArgs().hasArg(options::OPT__print_diagnostic_categories)) { PrintDiagnosticCategories(llvm::outs()); return false; } if (C.getArgs().hasArg(options::OPT_help) || C.getArgs().hasArg(options::OPT__help_hidden)) { PrintHelp(C.getArgs().hasArg(options::OPT__help_hidden)); return false; } if (C.getArgs().hasArg(options::OPT__version)) { // Follow gcc behavior and use stdout for --version and stderr for -v. PrintVersion(C, llvm::outs()); return false; } if (C.getArgs().hasArg(options::OPT_v) || C.getArgs().hasArg(options::OPT__HASH_HASH_HASH) || C.getArgs().hasArg(options::OPT_print_supported_cpus) || C.getArgs().hasArg(options::OPT_print_supported_extensions) || C.getArgs().hasArg(options::OPT_print_enabled_extensions)) { PrintVersion(C, llvm::errs()); SuppressMissingInputWarning = true; } if (C.getArgs().hasArg(options::OPT_v)) { if (!SystemConfigDir.empty()) llvm::errs() << "System configuration file directory: " << SystemConfigDir << "\n"; if (!UserConfigDir.empty()) llvm::errs() << "User configuration file directory: " << UserConfigDir << "\n"; } const ToolChain &TC = C.getDefaultToolChain(); if (C.getArgs().hasArg(options::OPT_v)) TC.printVerboseInfo(llvm::errs()); if (C.getArgs().hasArg(options::OPT_print_resource_dir)) { llvm::outs() << ResourceDir << '\n'; return false; } if (C.getArgs().hasArg(options::OPT_print_search_dirs)) { llvm::outs() << "programs: ="; bool separator = false; // Print -B and COMPILER_PATH. for (const std::string &Path : PrefixDirs) { if (separator) llvm::outs() << llvm::sys::EnvPathSeparator; llvm::outs() << Path; separator = true; } for (const std::string &Path : TC.getProgramPaths()) { if (separator) llvm::outs() << llvm::sys::EnvPathSeparator; llvm::outs() << Path; separator = true; } llvm::outs() << "\n"; llvm::outs() << "libraries: =" << ResourceDir; StringRef sysroot = C.getSysRoot(); for (const std::string &Path : TC.getFilePaths()) { // Always print a separator. ResourceDir was the first item shown. llvm::outs() << llvm::sys::EnvPathSeparator; // Interpretation of leading '=' is needed only for NetBSD. if (Path[0] == '=') llvm::outs() << sysroot << Path.substr(1); else llvm::outs() << Path; } llvm::outs() << "\n"; return false; } if (C.getArgs().hasArg(options::OPT_print_std_module_manifest_path)) { llvm::outs() << GetStdModuleManifestPath(C, C.getDefaultToolChain()) << '\n'; return false; } if (C.getArgs().hasArg(options::OPT_print_runtime_dir)) { if (std::optional RuntimePath = TC.getRuntimePath()) llvm::outs() << *RuntimePath << '\n'; else llvm::outs() << TC.getCompilerRTPath() << '\n'; return false; } if (C.getArgs().hasArg(options::OPT_print_diagnostic_options)) { std::vector Flags = DiagnosticIDs::getDiagnosticFlags(); for (std::size_t I = 0; I != Flags.size(); I += 2) llvm::outs() << " " << Flags[I] << "\n " << Flags[I + 1] << "\n\n"; return false; } // FIXME: The following handlers should use a callback mechanism, we don't // know what the client would like to do. if (Arg *A = C.getArgs().getLastArg(options::OPT_print_file_name_EQ)) { llvm::outs() << GetFilePath(A->getValue(), TC) << "\n"; return false; } if (Arg *A = C.getArgs().getLastArg(options::OPT_print_prog_name_EQ)) { StringRef ProgName = A->getValue(); // Null program name cannot have a path. if (! ProgName.empty()) llvm::outs() << GetProgramPath(ProgName, TC); llvm::outs() << "\n"; return false; } if (Arg *A = C.getArgs().getLastArg(options::OPT_autocomplete)) { StringRef PassedFlags = A->getValue(); HandleAutocompletions(PassedFlags); return false; } if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) { ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs()); const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs())); // The 'Darwin' toolchain is initialized only when its arguments are // computed. Get the default arguments for OFK_None to ensure that // initialization is performed before trying to access properties of // the toolchain in the functions below. // FIXME: Remove when darwin's toolchain is initialized during construction. // FIXME: For some more esoteric targets the default toolchain is not the // correct one. C.getArgsForToolChain(&TC, Triple.getArchName(), Action::OFK_None); RegisterEffectiveTriple TripleRAII(TC, Triple); switch (RLT) { case ToolChain::RLT_CompilerRT: llvm::outs() << TC.getCompilerRT(C.getArgs(), "builtins") << "\n"; break; case ToolChain::RLT_Libgcc: llvm::outs() << GetFilePath("libgcc.a", TC) << "\n"; break; } return false; } if (C.getArgs().hasArg(options::OPT_print_multi_lib)) { for (const Multilib &Multilib : TC.getMultilibs()) llvm::outs() << Multilib << "\n"; return false; } if (C.getArgs().hasArg(options::OPT_print_multi_flags)) { Multilib::flags_list ArgFlags = TC.getMultilibFlags(C.getArgs()); llvm::StringSet<> ExpandedFlags = TC.getMultilibs().expandFlags(ArgFlags); std::set SortedFlags; for (const auto &FlagEntry : ExpandedFlags) SortedFlags.insert(FlagEntry.getKey()); for (auto Flag : SortedFlags) llvm::outs() << Flag << '\n'; return false; } if (C.getArgs().hasArg(options::OPT_print_multi_directory)) { for (const Multilib &Multilib : TC.getSelectedMultilibs()) { if (Multilib.gccSuffix().empty()) llvm::outs() << ".\n"; else { StringRef Suffix(Multilib.gccSuffix()); assert(Suffix.front() == '/'); llvm::outs() << Suffix.substr(1) << "\n"; } } return false; } if (C.getArgs().hasArg(options::OPT_print_target_triple)) { llvm::outs() << TC.getTripleString() << "\n"; return false; } if (C.getArgs().hasArg(options::OPT_print_effective_triple)) { const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs())); llvm::outs() << Triple.getTriple() << "\n"; return false; } if (C.getArgs().hasArg(options::OPT_print_targets)) { llvm::TargetRegistry::printRegisteredTargetsForVersion(llvm::outs()); return false; } return true; } enum { TopLevelAction = 0, HeadSibAction = 1, OtherSibAction = 2, }; // Display an action graph human-readably. Action A is the "sink" node // and latest-occuring action. Traversal is in pre-order, visiting the // inputs to each action before printing the action itself. static unsigned PrintActions1(const Compilation &C, Action *A, std::map &Ids, Twine Indent = {}, int Kind = TopLevelAction) { if (Ids.count(A)) // A was already visited. return Ids[A]; std::string str; llvm::raw_string_ostream os(str); auto getSibIndent = [](int K) -> Twine { return (K == HeadSibAction) ? " " : (K == OtherSibAction) ? "| " : ""; }; Twine SibIndent = Indent + getSibIndent(Kind); int SibKind = HeadSibAction; os << Action::getClassName(A->getKind()) << ", "; if (InputAction *IA = dyn_cast(A)) { os << "\"" << IA->getInputArg().getValue() << "\""; } else if (BindArchAction *BIA = dyn_cast(A)) { os << '"' << BIA->getArchName() << '"' << ", {" << PrintActions1(C, *BIA->input_begin(), Ids, SibIndent, SibKind) << "}"; } else if (OffloadAction *OA = dyn_cast(A)) { bool IsFirst = true; OA->doOnEachDependence( [&](Action *A, const ToolChain *TC, const char *BoundArch) { assert(TC && "Unknown host toolchain"); // E.g. for two CUDA device dependences whose bound arch is sm_20 and // sm_35 this will generate: // "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device" // (nvptx64-nvidia-cuda:sm_35) {#ID} if (!IsFirst) os << ", "; os << '"'; os << A->getOffloadingKindPrefix(); os << " ("; os << TC->getTriple().normalize(); if (BoundArch) os << ":" << BoundArch; os << ")"; os << '"'; os << " {" << PrintActions1(C, A, Ids, SibIndent, SibKind) << "}"; IsFirst = false; SibKind = OtherSibAction; }); } else { const ActionList *AL = &A->getInputs(); if (AL->size()) { const char *Prefix = "{"; for (Action *PreRequisite : *AL) { os << Prefix << PrintActions1(C, PreRequisite, Ids, SibIndent, SibKind); Prefix = ", "; SibKind = OtherSibAction; } os << "}"; } else os << "{}"; } // Append offload info for all options other than the offloading action // itself (e.g. (cuda-device, sm_20) or (cuda-host)). std::string offload_str; llvm::raw_string_ostream offload_os(offload_str); if (!isa(A)) { auto S = A->getOffloadingKindPrefix(); if (!S.empty()) { offload_os << ", (" << S; if (A->getOffloadingArch()) offload_os << ", " << A->getOffloadingArch(); offload_os << ")"; } } auto getSelfIndent = [](int K) -> Twine { return (K == HeadSibAction) ? "+- " : (K == OtherSibAction) ? "|- " : ""; }; unsigned Id = Ids.size(); Ids[A] = Id; llvm::errs() << Indent + getSelfIndent(Kind) << Id << ": " << os.str() << ", " << types::getTypeName(A->getType()) << offload_os.str() << "\n"; return Id; } // Print the action graphs in a compilation C. // For example "clang -c file1.c file2.c" is composed of two subgraphs. void Driver::PrintActions(const Compilation &C) const { std::map Ids; for (Action *A : C.getActions()) PrintActions1(C, A, Ids); } /// Check whether the given input tree contains any compilation or /// assembly actions. static bool ContainsCompileOrAssembleAction(const Action *A) { if (isa(A) || isa(A) || isa(A)) return true; return llvm::any_of(A->inputs(), ContainsCompileOrAssembleAction); } void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, const InputList &BAInputs) const { DerivedArgList &Args = C.getArgs(); ActionList &Actions = C.getActions(); llvm::PrettyStackTraceString CrashInfo("Building universal build actions"); // Collect the list of architectures. Duplicates are allowed, but should only // be handled once (in the order seen). llvm::StringSet<> ArchNames; SmallVector Archs; for (Arg *A : Args) { if (A->getOption().matches(options::OPT_arch)) { // Validate the option here; we don't save the type here because its // particular spelling may participate in other driver choices. llvm::Triple::ArchType Arch = tools::darwin::getArchTypeForMachOArchName(A->getValue()); if (Arch == llvm::Triple::UnknownArch) { Diag(clang::diag::err_drv_invalid_arch_name) << A->getAsString(Args); continue; } A->claim(); if (ArchNames.insert(A->getValue()).second) Archs.push_back(A->getValue()); } } // When there is no explicit arch for this platform, make sure we still bind // the architecture (to the default) so that -Xarch_ is handled correctly. if (!Archs.size()) Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName())); ActionList SingleActions; BuildActions(C, Args, BAInputs, SingleActions); // Add in arch bindings for every top level action, as well as lipo and // dsymutil steps if needed. for (Action* Act : SingleActions) { // Make sure we can lipo this kind of output. If not (and it is an actual // output) then we disallow, since we can't create an output file with the // right name without overwriting it. We could remove this oddity by just // changing the output names to include the arch, which would also fix // -save-temps. Compatibility wins for now. if (Archs.size() > 1 && !types::canLipoType(Act->getType())) Diag(clang::diag::err_drv_invalid_output_with_multiple_archs) << types::getTypeName(Act->getType()); ActionList Inputs; for (unsigned i = 0, e = Archs.size(); i != e; ++i) Inputs.push_back(C.MakeAction(Act, Archs[i])); // Lipo if necessary, we do it this way because we need to set the arch flag // so that -Xarch_ gets overwritten. if (Inputs.size() == 1 || Act->getType() == types::TY_Nothing) Actions.append(Inputs.begin(), Inputs.end()); else Actions.push_back(C.MakeAction(Inputs, Act->getType())); // Handle debug info queries. Arg *A = Args.getLastArg(options::OPT_g_Group); bool enablesDebugInfo = A && !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_gstabs); if ((enablesDebugInfo || willEmitRemarks(Args)) && ContainsCompileOrAssembleAction(Actions.back())) { // Add a 'dsymutil' step if necessary, when debug info is enabled and we // have a compile input. We need to run 'dsymutil' ourselves in such cases // because the debug info will refer to a temporary object file which // will be removed at the end of the compilation process. if (Act->getType() == types::TY_Image) { ActionList Inputs; Inputs.push_back(Actions.back()); Actions.pop_back(); Actions.push_back( C.MakeAction(Inputs, types::TY_dSYM)); } // Verify the debug info output. if (Args.hasArg(options::OPT_verify_debug_info)) { Action* LastAction = Actions.back(); Actions.pop_back(); Actions.push_back(C.MakeAction( LastAction, types::TY_Nothing)); } } } } bool Driver::DiagnoseInputExistence(const DerivedArgList &Args, StringRef Value, types::ID Ty, bool TypoCorrect) const { if (!getCheckInputsExist()) return true; // stdin always exists. if (Value == "-") return true; // If it's a header to be found in the system or user search path, then defer // complaints about its absence until those searches can be done. When we // are definitely processing headers for C++20 header units, extend this to // allow the user to put "-fmodule-header -xc++-header vector" for example. if (Ty == types::TY_CXXSHeader || Ty == types::TY_CXXUHeader || (ModulesModeCXX20 && Ty == types::TY_CXXHeader)) return true; if (getVFS().exists(Value)) return true; if (TypoCorrect) { // Check if the filename is a typo for an option flag. OptTable thinks // that all args that are not known options and that start with / are // filenames, but e.g. `/diagnostic:caret` is more likely a typo for // the option `/diagnostics:caret` than a reference to a file in the root // directory. std::string Nearest; if (getOpts().findNearest(Value, Nearest, getOptionVisibilityMask()) <= 1) { Diag(clang::diag::err_drv_no_such_file_with_suggestion) << Value << Nearest; return false; } } // In CL mode, don't error on apparently non-existent linker inputs, because // they can be influenced by linker flags the clang driver might not // understand. // Examples: // - `clang-cl main.cc ole32.lib` in a non-MSVC shell will make the driver // module look for an MSVC installation in the registry. (We could ask // the MSVCToolChain object if it can find `ole32.lib`, but the logic to // look in the registry might move into lld-link in the future so that // lld-link invocations in non-MSVC shells just work too.) // - `clang-cl ... /link ...` can pass arbitrary flags to the linker, // including /libpath:, which is used to find .lib and .obj files. // So do not diagnose this on the driver level. Rely on the linker diagnosing // it. (If we don't end up invoking the linker, this means we'll emit a // "'linker' input unused [-Wunused-command-line-argument]" warning instead // of an error.) // // Only do this skip after the typo correction step above. `/Brepo` is treated // as TY_Object, but it's clearly a typo for `/Brepro`. It seems fine to emit // an error if we have a flag that's within an edit distance of 1 from a // flag. (Users can use `-Wl,` or `/linker` to launder the flag past the // driver in the unlikely case they run into this.) // // Don't do this for inputs that start with a '/', else we'd pass options // like /libpath: through to the linker silently. // // Emitting an error for linker inputs can also cause incorrect diagnostics // with the gcc driver. The command // clang -fuse-ld=lld -Wl,--chroot,some/dir /file.o // will make lld look for some/dir/file.o, while we will diagnose here that // `/file.o` does not exist. However, configure scripts check if // `clang /GR-` compiles without error to see if the compiler is cl.exe, // so we can't downgrade diagnostics for `/GR-` from an error to a warning // in cc mode. (We can in cl mode because cl.exe itself only warns on // unknown flags.) if (IsCLMode() && Ty == types::TY_Object && !Value.starts_with("/")) return true; Diag(clang::diag::err_drv_no_such_file) << Value; return false; } // Get the C++20 Header Unit type corresponding to the input type. static types::ID CXXHeaderUnitType(ModuleHeaderMode HM) { switch (HM) { case HeaderMode_User: return types::TY_CXXUHeader; case HeaderMode_System: return types::TY_CXXSHeader; case HeaderMode_Default: break; case HeaderMode_None: llvm_unreachable("should not be called in this case"); } return types::TY_CXXHUHeader; } // Construct a the list of inputs and their types. void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, InputList &Inputs) const { const llvm::opt::OptTable &Opts = getOpts(); // Track the current user specified (-x) input. We also explicitly track the // argument used to set the type; we only want to claim the type when we // actually use it, so we warn about unused -x arguments. types::ID InputType = types::TY_Nothing; Arg *InputTypeArg = nullptr; // The last /TC or /TP option sets the input type to C or C++ globally. if (Arg *TCTP = Args.getLastArgNoClaim(options::OPT__SLASH_TC, options::OPT__SLASH_TP)) { InputTypeArg = TCTP; InputType = TCTP->getOption().matches(options::OPT__SLASH_TC) ? types::TY_C : types::TY_CXX; Arg *Previous = nullptr; bool ShowNote = false; for (Arg *A : Args.filtered(options::OPT__SLASH_TC, options::OPT__SLASH_TP)) { if (Previous) { Diag(clang::diag::warn_drv_overriding_option) << Previous->getSpelling() << A->getSpelling(); ShowNote = true; } Previous = A; } if (ShowNote) Diag(clang::diag::note_drv_t_option_is_global); } // Warn -x after last input file has no effect { Arg *LastXArg = Args.getLastArgNoClaim(options::OPT_x); Arg *LastInputArg = Args.getLastArgNoClaim(options::OPT_INPUT); if (LastXArg && LastInputArg && LastInputArg->getIndex() < LastXArg->getIndex()) Diag(clang::diag::warn_drv_unused_x) << LastXArg->getValue(); } for (Arg *A : Args) { if (A->getOption().getKind() == Option::InputClass) { const char *Value = A->getValue(); types::ID Ty = types::TY_INVALID; // Infer the input type if necessary. if (InputType == types::TY_Nothing) { // If there was an explicit arg for this, claim it. if (InputTypeArg) InputTypeArg->claim(); // stdin must be handled specially. if (memcmp(Value, "-", 2) == 0) { if (IsFlangMode()) { Ty = types::TY_Fortran; } else if (IsDXCMode()) { Ty = types::TY_HLSL; } else { // If running with -E, treat as a C input (this changes the // builtin macros, for example). This may be overridden by -ObjC // below. // // Otherwise emit an error but still use a valid type to avoid // spurious errors (e.g., no inputs). assert(!CCGenDiagnostics && "stdin produces no crash reproducer"); if (!Args.hasArgNoClaim(options::OPT_E) && !CCCIsCPP()) Diag(IsCLMode() ? clang::diag::err_drv_unknown_stdin_type_clang_cl : clang::diag::err_drv_unknown_stdin_type); Ty = types::TY_C; } } else { // Otherwise lookup by extension. // Fallback is C if invoked as C preprocessor, C++ if invoked with // clang-cl /E, or Object otherwise. // We use a host hook here because Darwin at least has its own // idea of what .s is. if (const char *Ext = strrchr(Value, '.')) Ty = TC.LookupTypeForExtension(Ext + 1); if (Ty == types::TY_INVALID) { if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics)) Ty = types::TY_CXX; else if (CCCIsCPP() || CCGenDiagnostics) Ty = types::TY_C; else Ty = types::TY_Object; } // If the driver is invoked as C++ compiler (like clang++ or c++) it // should autodetect some input files as C++ for g++ compatibility. if (CCCIsCXX()) { types::ID OldTy = Ty; Ty = types::lookupCXXTypeForCType(Ty); // Do not complain about foo.h, when we are known to be processing // it as a C++20 header unit. if (Ty != OldTy && !(OldTy == types::TY_CHeader && hasHeaderMode())) Diag(clang::diag::warn_drv_treating_input_as_cxx) << getTypeName(OldTy) << getTypeName(Ty); } // If running with -fthinlto-index=, extensions that normally identify // native object files actually identify LLVM bitcode files. if (Args.hasArgNoClaim(options::OPT_fthinlto_index_EQ) && Ty == types::TY_Object) Ty = types::TY_LLVM_BC; } // -ObjC and -ObjC++ override the default language, but only for "source // files". We just treat everything that isn't a linker input as a // source file. // // FIXME: Clean this up if we move the phase sequence into the type. if (Ty != types::TY_Object) { if (Args.hasArg(options::OPT_ObjC)) Ty = types::TY_ObjC; else if (Args.hasArg(options::OPT_ObjCXX)) Ty = types::TY_ObjCXX; } // Disambiguate headers that are meant to be header units from those // intended to be PCH. Avoid missing '.h' cases that are counted as // C headers by default - we know we are in C++ mode and we do not // want to issue a complaint about compiling things in the wrong mode. if ((Ty == types::TY_CXXHeader || Ty == types::TY_CHeader) && hasHeaderMode()) Ty = CXXHeaderUnitType(CXX20HeaderType); } else { assert(InputTypeArg && "InputType set w/o InputTypeArg"); if (!InputTypeArg->getOption().matches(options::OPT_x)) { // If emulating cl.exe, make sure that /TC and /TP don't affect input // object files. const char *Ext = strrchr(Value, '.'); if (Ext && TC.LookupTypeForExtension(Ext + 1) == types::TY_Object) Ty = types::TY_Object; } if (Ty == types::TY_INVALID) { Ty = InputType; InputTypeArg->claim(); } } if ((Ty == types::TY_C || Ty == types::TY_CXX) && Args.hasArgNoClaim(options::OPT_hipstdpar)) Ty = types::TY_HIP; if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true)) Inputs.push_back(std::make_pair(Ty, A)); } else if (A->getOption().matches(options::OPT__SLASH_Tc)) { StringRef Value = A->getValue(); if (DiagnoseInputExistence(Args, Value, types::TY_C, /*TypoCorrect=*/false)) { Arg *InputArg = MakeInputArg(Args, Opts, A->getValue()); Inputs.push_back(std::make_pair(types::TY_C, InputArg)); } A->claim(); } else if (A->getOption().matches(options::OPT__SLASH_Tp)) { StringRef Value = A->getValue(); if (DiagnoseInputExistence(Args, Value, types::TY_CXX, /*TypoCorrect=*/false)) { Arg *InputArg = MakeInputArg(Args, Opts, A->getValue()); Inputs.push_back(std::make_pair(types::TY_CXX, InputArg)); } A->claim(); } else if (A->getOption().hasFlag(options::LinkerInput)) { // Just treat as object type, we could make a special type for this if // necessary. Inputs.push_back(std::make_pair(types::TY_Object, A)); } else if (A->getOption().matches(options::OPT_x)) { InputTypeArg = A; InputType = types::lookupTypeForTypeSpecifier(A->getValue()); A->claim(); // Follow gcc behavior and treat as linker input for invalid -x // options. Its not clear why we shouldn't just revert to unknown; but // this isn't very important, we might as well be bug compatible. if (!InputType) { Diag(clang::diag::err_drv_unknown_language) << A->getValue(); InputType = types::TY_Object; } // If the user has put -fmodule-header{,=} then we treat C++ headers as // header unit inputs. So we 'promote' -xc++-header appropriately. if (InputType == types::TY_CXXHeader && hasHeaderMode()) InputType = CXXHeaderUnitType(CXX20HeaderType); } else if (A->getOption().getID() == options::OPT_U) { assert(A->getNumValues() == 1 && "The /U option has one value."); StringRef Val = A->getValue(0); if (Val.find_first_of("/\\") != StringRef::npos) { // Warn about e.g. "/Users/me/myfile.c". Diag(diag::warn_slash_u_filename) << Val; Diag(diag::note_use_dashdash); } } } if (CCCIsCPP() && Inputs.empty()) { // If called as standalone preprocessor, stdin is processed // if no other input is present. Arg *A = MakeInputArg(Args, Opts, "-"); Inputs.push_back(std::make_pair(types::TY_C, A)); } } namespace { /// Provides a convenient interface for different programming models to generate /// the required device actions. class OffloadingActionBuilder final { /// Flag used to trace errors in the builder. bool IsValid = false; /// The compilation that is using this builder. Compilation &C; /// Map between an input argument and the offload kinds used to process it. std::map InputArgToOffloadKindMap; /// Map between a host action and its originating input argument. std::map HostActionToInputArgMap; /// Builder interface. It doesn't build anything or keep any state. class DeviceActionBuilder { public: typedef const llvm::SmallVectorImpl PhasesTy; enum ActionBuilderReturnCode { // The builder acted successfully on the current action. ABRT_Success, // The builder didn't have to act on the current action. ABRT_Inactive, // The builder was successful and requested the host action to not be // generated. ABRT_Ignore_Host, }; protected: /// Compilation associated with this builder. Compilation &C; /// Tool chains associated with this builder. The same programming /// model may have associated one or more tool chains. SmallVector ToolChains; /// The derived arguments associated with this builder. DerivedArgList &Args; /// The inputs associated with this builder. const Driver::InputList &Inputs; /// The associated offload kind. Action::OffloadKind AssociatedOffloadKind = Action::OFK_None; public: DeviceActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs, Action::OffloadKind AssociatedOffloadKind) : C(C), Args(Args), Inputs(Inputs), AssociatedOffloadKind(AssociatedOffloadKind) {} virtual ~DeviceActionBuilder() {} /// Fill up the array \a DA with all the device dependences that should be /// added to the provided host action \a HostAction. By default it is /// inactive. virtual ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, PhasesTy &Phases) { return ABRT_Inactive; } /// Update the state to include the provided host action \a HostAction as a /// dependency of the current device action. By default it is inactive. virtual ActionBuilderReturnCode addDeviceDependences(Action *HostAction) { return ABRT_Inactive; } /// Append top level actions generated by the builder. virtual void appendTopLevelActions(ActionList &AL) {} /// Append linker device actions generated by the builder. virtual void appendLinkDeviceActions(ActionList &AL) {} /// Append linker host action generated by the builder. virtual Action* appendLinkHostActions(ActionList &AL) { return nullptr; } /// Append linker actions generated by the builder. virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} /// Initialize the builder. Return true if any initialization errors are /// found. virtual bool initialize() { return false; } /// Return true if the builder can use bundling/unbundling. virtual bool canUseBundlerUnbundler() const { return false; } /// Return true if this builder is valid. We have a valid builder if we have /// associated device tool chains. bool isValid() { return !ToolChains.empty(); } /// Return the associated offload kind. Action::OffloadKind getAssociatedOffloadKind() { return AssociatedOffloadKind; } }; /// Base class for CUDA/HIP action builder. It injects device code in /// the host backend action. class CudaActionBuilderBase : public DeviceActionBuilder { protected: /// Flags to signal if the user requested host-only or device-only /// compilation. bool CompileHostOnly = false; bool CompileDeviceOnly = false; bool EmitLLVM = false; bool EmitAsm = false; /// ID to identify each device compilation. For CUDA it is simply the /// GPU arch string. For HIP it is either the GPU arch string or GPU /// arch string plus feature strings delimited by a plus sign, e.g. /// gfx906+xnack. struct TargetID { /// Target ID string which is persistent throughout the compilation. const char *ID; TargetID(OffloadArch Arch) { ID = OffloadArchToString(Arch); } TargetID(const char *ID) : ID(ID) {} operator const char *() { return ID; } operator StringRef() { return StringRef(ID); } }; /// List of GPU architectures to use in this compilation. SmallVector GpuArchList; /// The CUDA actions for the current input. ActionList CudaDeviceActions; /// The CUDA fat binary if it was generated for the current input. Action *CudaFatBinary = nullptr; /// Flag that is set to true if this builder acted on the current input. bool IsActive = false; /// Flag for -fgpu-rdc. bool Relocatable = false; /// Default GPU architecture if there's no one specified. OffloadArch DefaultOffloadArch = OffloadArch::UNKNOWN; /// Method to generate compilation unit ID specified by option /// '-fuse-cuid='. enum UseCUIDKind { CUID_Hash, CUID_Random, CUID_None, CUID_Invalid }; UseCUIDKind UseCUID = CUID_Hash; /// Compilation unit ID specified by option '-cuid='. StringRef FixedCUID; public: CudaActionBuilderBase(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs, Action::OffloadKind OFKind) : DeviceActionBuilder(C, Args, Inputs, OFKind) { CompileDeviceOnly = C.getDriver().offloadDeviceOnly(); Relocatable = Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, /*Default=*/false); } ActionBuilderReturnCode addDeviceDependences(Action *HostAction) override { // While generating code for CUDA, we only depend on the host input action // to trigger the creation of all the CUDA device actions. // If we are dealing with an input action, replicate it for each GPU // architecture. If we are in host-only mode we return 'success' so that // the host uses the CUDA offload kind. if (auto *IA = dyn_cast(HostAction)) { assert(!GpuArchList.empty() && "We should have at least one GPU architecture."); // If the host input is not CUDA or HIP, we don't need to bother about // this input. if (!(IA->getType() == types::TY_CUDA || IA->getType() == types::TY_HIP || IA->getType() == types::TY_PP_HIP)) { // The builder will ignore this input. IsActive = false; return ABRT_Inactive; } // Set the flag to true, so that the builder acts on the current input. IsActive = true; if (CompileHostOnly) return ABRT_Success; // Replicate inputs for each GPU architecture. auto Ty = IA->getType() == types::TY_HIP ? types::TY_HIP_DEVICE : types::TY_CUDA_DEVICE; std::string CUID = FixedCUID.str(); if (CUID.empty()) { if (UseCUID == CUID_Random) CUID = llvm::utohexstr(llvm::sys::Process::GetRandomNumber(), /*LowerCase=*/true); else if (UseCUID == CUID_Hash) { llvm::MD5 Hasher; llvm::MD5::MD5Result Hash; SmallString<256> RealPath; llvm::sys::fs::real_path(IA->getInputArg().getValue(), RealPath, /*expand_tilde=*/true); Hasher.update(RealPath); for (auto *A : Args) { if (A->getOption().matches(options::OPT_INPUT)) continue; Hasher.update(A->getAsString(Args)); } Hasher.final(Hash); CUID = llvm::utohexstr(Hash.low(), /*LowerCase=*/true); } } IA->setId(CUID); for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { CudaDeviceActions.push_back( C.MakeAction(IA->getInputArg(), Ty, IA->getId())); } return ABRT_Success; } // If this is an unbundling action use it as is for each CUDA toolchain. if (auto *UA = dyn_cast(HostAction)) { // If -fgpu-rdc is disabled, should not unbundle since there is no // device code to link. if (UA->getType() == types::TY_Object && !Relocatable) return ABRT_Inactive; CudaDeviceActions.clear(); auto *IA = cast(UA->getInputs().back()); std::string FileName = IA->getInputArg().getAsString(Args); // Check if the type of the file is the same as the action. Do not // unbundle it if it is not. Do not unbundle .so files, for example, // which are not object files. Files with extension ".lib" is classified // as TY_Object but they are actually archives, therefore should not be // unbundled here as objects. They will be handled at other places. const StringRef LibFileExt = ".lib"; if (IA->getType() == types::TY_Object && (!llvm::sys::path::has_extension(FileName) || types::lookupTypeForExtension( llvm::sys::path::extension(FileName).drop_front()) != types::TY_Object || llvm::sys::path::extension(FileName) == LibFileExt)) return ABRT_Inactive; for (auto Arch : GpuArchList) { CudaDeviceActions.push_back(UA); UA->registerDependentActionInfo(ToolChains[0], Arch, AssociatedOffloadKind); } IsActive = true; return ABRT_Success; } return IsActive ? ABRT_Success : ABRT_Inactive; } void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. auto AddTopLevel = [&](Action *A, TargetID TargetID) { OffloadAction::DeviceDependences Dep; Dep.add(*A, *ToolChains.front(), TargetID, AssociatedOffloadKind); AL.push_back(C.MakeAction(Dep, A->getType())); }; // If we have a fat binary, add it to the list. if (CudaFatBinary) { AddTopLevel(CudaFatBinary, OffloadArch::UNUSED); CudaDeviceActions.clear(); CudaFatBinary = nullptr; return; } if (CudaDeviceActions.empty()) return; // If we have CUDA actions at this point, that's because we have a have // partial compilation, so we should have an action for each GPU // architecture. assert(CudaDeviceActions.size() == GpuArchList.size() && "Expecting one action per GPU architecture."); assert(ToolChains.size() == 1 && "Expecting to have a single CUDA toolchain."); for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) AddTopLevel(CudaDeviceActions[I], GpuArchList[I]); CudaDeviceActions.clear(); } /// Get canonicalized offload arch option. \returns empty StringRef if the /// option is invalid. virtual StringRef getCanonicalOffloadArch(StringRef Arch) = 0; virtual std::optional> getConflictOffloadArchCombination(const std::set &GpuArchs) = 0; bool initialize() override { assert(AssociatedOffloadKind == Action::OFK_Cuda || AssociatedOffloadKind == Action::OFK_HIP); // We don't need to support CUDA. if (AssociatedOffloadKind == Action::OFK_Cuda && !C.hasOffloadToolChain()) return false; // We don't need to support HIP. if (AssociatedOffloadKind == Action::OFK_HIP && !C.hasOffloadToolChain()) return false; const ToolChain *HostTC = C.getSingleOffloadToolChain(); assert(HostTC && "No toolchain for host compilation."); if (HostTC->getTriple().isNVPTX() || HostTC->getTriple().getArch() == llvm::Triple::amdgcn) { // We do not support targeting NVPTX/AMDGCN for host compilation. Throw // an error and abort pipeline construction early so we don't trip // asserts that assume device-side compilation. C.getDriver().Diag(diag::err_drv_cuda_host_arch) << HostTC->getTriple().getArchName(); return true; } ToolChains.push_back( AssociatedOffloadKind == Action::OFK_Cuda ? C.getSingleOffloadToolChain() : C.getSingleOffloadToolChain()); CompileHostOnly = C.getDriver().offloadHostOnly(); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); FixedCUID = Args.getLastArgValue(options::OPT_cuid_EQ); if (Arg *A = Args.getLastArg(options::OPT_fuse_cuid_EQ)) { StringRef UseCUIDStr = A->getValue(); UseCUID = llvm::StringSwitch(UseCUIDStr) .Case("hash", CUID_Hash) .Case("random", CUID_Random) .Case("none", CUID_None) .Default(CUID_Invalid); if (UseCUID == CUID_Invalid) { C.getDriver().Diag(diag::err_drv_invalid_value) << A->getAsString(Args) << UseCUIDStr; C.setContainsError(); return true; } } // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && Args.hasArgNoClaim(options::OPT_offload_arch_EQ, options::OPT_no_offload_arch_EQ)) { C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload-arch" << "--offload"; } // Collect all offload arch parameters, removing duplicates. std::set GpuArchs; bool Error = false; for (Arg *A : Args) { if (!(A->getOption().matches(options::OPT_offload_arch_EQ) || A->getOption().matches(options::OPT_no_offload_arch_EQ))) continue; A->claim(); for (StringRef ArchStr : llvm::split(A->getValue(), ",")) { if (A->getOption().matches(options::OPT_no_offload_arch_EQ) && ArchStr == "all") { GpuArchs.clear(); } else if (ArchStr == "native") { const ToolChain &TC = *ToolChains.front(); auto GPUsOrErr = ToolChains.front()->getSystemGPUArchs(Args); if (!GPUsOrErr) { TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch) << llvm::Triple::getArchTypeName(TC.getArch()) << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } for (auto GPU : *GPUsOrErr) { GpuArchs.insert(Args.MakeArgString(GPU)); } } else { ArchStr = getCanonicalOffloadArch(ArchStr); if (ArchStr.empty()) { Error = true; } else if (A->getOption().matches(options::OPT_offload_arch_EQ)) GpuArchs.insert(ArchStr); else if (A->getOption().matches(options::OPT_no_offload_arch_EQ)) GpuArchs.erase(ArchStr); else llvm_unreachable("Unexpected option."); } } } auto &&ConflictingArchs = getConflictOffloadArchCombination(GpuArchs); if (ConflictingArchs) { C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) << ConflictingArchs->first << ConflictingArchs->second; C.setContainsError(); return true; } // Collect list of GPUs remaining in the set. for (auto Arch : GpuArchs) GpuArchList.push_back(Arch.data()); // Default to sm_20 which is the lowest common denominator for // supported GPUs. sm_20 code should work correctly, if // suboptimally, on all newer GPUs. if (GpuArchList.empty()) { if (ToolChains.front()->getTriple().isSPIRV()) { if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD) GpuArchList.push_back(OffloadArch::AMDGCNSPIRV); else GpuArchList.push_back(OffloadArch::Generic); } else { GpuArchList.push_back(DefaultOffloadArch); } } return Error; } }; /// \brief CUDA action builder. It injects device code in the host backend /// action. class CudaActionBuilder final : public CudaActionBuilderBase { public: CudaActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_Cuda) { DefaultOffloadArch = OffloadArch::CudaDefault; } StringRef getCanonicalOffloadArch(StringRef ArchStr) override { OffloadArch Arch = StringToOffloadArch(ArchStr); if (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch)) { C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; return StringRef(); } return OffloadArchToString(Arch); } std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { return std::nullopt; } ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, PhasesTy &Phases) override { if (!IsActive) return ABRT_Inactive; // If we don't have more CUDA actions, we don't have any dependences to // create for the host. if (CudaDeviceActions.empty()) return ABRT_Success; assert(CudaDeviceActions.size() == GpuArchList.size() && "Expecting one action per GPU architecture."); assert(!CompileHostOnly && "Not expecting CUDA actions in host-only compilation."); // If we are generating code for the device or we are in a backend phase, // we attempt to generate the fat binary. We compile each arch to ptx and // assemble to cubin, then feed the cubin *and* the ptx into a device // "link" action, which uses fatbinary to combine these cubins into one // fatbin. The fatbin is then an input to the host action if not in // device-only mode. if (CompileDeviceOnly || CurPhase == phases::Backend) { ActionList DeviceActions; for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { // Produce the device action from the current phase up to the assemble // phase. for (auto Ph : Phases) { // Skip the phases that were already dealt with. if (Ph < CurPhase) continue; // We have to be consistent with the host final phase. if (Ph > FinalPhase) break; CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( C, Args, Ph, CudaDeviceActions[I], Action::OFK_Cuda); if (Ph == phases::Assemble) break; } // If we didn't reach the assemble phase, we can't generate the fat // binary. We don't need to generate the fat binary if we are not in // device-only mode. if (!isa(CudaDeviceActions[I]) || CompileDeviceOnly) continue; Action *AssembleAction = CudaDeviceActions[I]; assert(AssembleAction->getType() == types::TY_Object); assert(AssembleAction->getInputs().size() == 1); Action *BackendAction = AssembleAction->getInputs()[0]; assert(BackendAction->getType() == types::TY_PP_Asm); for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction(DDep, A->getType())); } } // We generate the fat binary if we have device input actions. if (!DeviceActions.empty()) { CudaFatBinary = C.MakeAction(DeviceActions, types::TY_CUDA_FATBIN); if (!CompileDeviceOnly) { DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, Action::OFK_Cuda); // Clear the fat binary, it is already a dependence to an host // action. CudaFatBinary = nullptr; } // Remove the CUDA actions as they are already connected to an host // action or fat binary. CudaDeviceActions.clear(); } // We avoid creating host action in device-only mode. return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; } else if (CurPhase > phases::Backend) { // If we are past the backend phase and still have a device action, we // don't have to do anything as this action is already a device // top-level action. return ABRT_Success; } assert(CurPhase < phases::Backend && "Generating single CUDA " "instructions should only occur " "before the backend phase!"); // By default, we produce an action for each device arch. for (Action *&A : CudaDeviceActions) A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); return ABRT_Success; } }; /// \brief HIP action builder. It injects device code in the host backend /// action. class HIPActionBuilder final : public CudaActionBuilderBase { /// The linker inputs obtained for each device arch. SmallVector DeviceLinkerInputs; // The default bundling behavior depends on the type of output, therefore // BundleOutput needs to be tri-value: None, true, or false. // Bundle code objects except --no-gpu-output is specified for device // only compilation. Bundle other type of output files only if // --gpu-bundle-output is specified for device only compilation. std::optional BundleOutput; std::optional EmitReloc; public: HIPActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) : CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) { DefaultOffloadArch = OffloadArch::HIPDefault; if (Args.hasArg(options::OPT_fhip_emit_relocatable, options::OPT_fno_hip_emit_relocatable)) { EmitReloc = Args.hasFlag(options::OPT_fhip_emit_relocatable, options::OPT_fno_hip_emit_relocatable, false); if (*EmitReloc) { if (Relocatable) { C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "-fhip-emit-relocatable" << "-fgpu-rdc"; } if (!CompileDeviceOnly) { C.getDriver().Diag(diag::err_opt_not_valid_without_opt) << "-fhip-emit-relocatable" << "--cuda-device-only"; } } } if (Args.hasArg(options::OPT_gpu_bundle_output, options::OPT_no_gpu_bundle_output)) BundleOutput = Args.hasFlag(options::OPT_gpu_bundle_output, options::OPT_no_gpu_bundle_output, true) && (!EmitReloc || !*EmitReloc); } bool canUseBundlerUnbundler() const override { return true; } StringRef getCanonicalOffloadArch(StringRef IdStr) override { llvm::StringMap Features; // getHIPOffloadTargetTriple() is known to return valid value as it has // been called successfully in the CreateOffloadingDeviceToolChains(). auto ArchStr = parseTargetID( *getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()), IdStr, &Features); if (!ArchStr) { C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << IdStr; C.setContainsError(); return StringRef(); } auto CanId = getCanonicalTargetID(*ArchStr, Features); return Args.MakeArgStringRef(CanId); }; std::optional> getConflictOffloadArchCombination( const std::set &GpuArchs) override { return getConflictTargetIDCombination(GpuArchs); } ActionBuilderReturnCode getDeviceDependences(OffloadAction::DeviceDependences &DA, phases::ID CurPhase, phases::ID FinalPhase, PhasesTy &Phases) override { if (!IsActive) return ABRT_Inactive; // amdgcn does not support linking of object files, therefore we skip // backend and assemble phases to output LLVM IR. Except for generating // non-relocatable device code, where we generate fat binary for device // code and pass to host in Backend phase. if (CudaDeviceActions.empty()) return ABRT_Success; assert(((CurPhase == phases::Link && Relocatable) || CudaDeviceActions.size() == GpuArchList.size()) && "Expecting one action per GPU architecture."); assert(!CompileHostOnly && "Not expecting HIP actions in host-only compilation."); bool ShouldLink = !EmitReloc || !*EmitReloc; if (!Relocatable && CurPhase == phases::Backend && !EmitLLVM && !EmitAsm && ShouldLink) { // If we are in backend phase, we attempt to generate the fat binary. // We compile each arch to IR and use a link action to generate code // object containing ISA. Then we use a special "link" action to create // a fat binary containing all the code objects for different GPU's. // The fat binary is then an input to the host action. for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { if (C.getDriver().isUsingLTO(/*IsOffload=*/true)) { // When LTO is enabled, skip the backend and assemble phases and // use lld to link the bitcode. ActionList AL; AL.push_back(CudaDeviceActions[I]); // Create a link action to link device IR with device library // and generate ISA. CudaDeviceActions[I] = C.MakeAction(AL, types::TY_Image); } else { // When LTO is not enabled, we follow the conventional // compiler phases, including backend and assemble phases. ActionList AL; Action *BackendAction = nullptr; if (ToolChains.front()->getTriple().isSPIRV()) { // Emit LLVM bitcode for SPIR-V targets. SPIR-V device tool chain // (HIPSPVToolChain) runs post-link LLVM IR passes. types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC; BackendAction = C.MakeAction(CudaDeviceActions[I], Output); } else BackendAction = C.getDriver().ConstructPhaseAction( C, Args, phases::Backend, CudaDeviceActions[I], AssociatedOffloadKind); auto AssembleAction = C.getDriver().ConstructPhaseAction( C, Args, phases::Assemble, BackendAction, AssociatedOffloadKind); AL.push_back(AssembleAction); // Create a link action to link device IR with device library // and generate ISA. CudaDeviceActions[I] = C.MakeAction(AL, types::TY_Image); } // OffloadingActionBuilder propagates device arch until an offload // action. Since the next action for creating fatbin does // not have device arch, whereas the above link action and its input // have device arch, an offload action is needed to stop the null // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction( DDep, CudaDeviceActions[I]->getType()); } if (!CompileDeviceOnly || !BundleOutput || *BundleOutput) { // Create HIP fat binary with a special "link" action. CudaFatBinary = C.MakeAction(CudaDeviceActions, types::TY_HIP_FATBIN); if (!CompileDeviceOnly) { DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, AssociatedOffloadKind); // Clear the fat binary, it is already a dependence to an host // action. CudaFatBinary = nullptr; } // Remove the CUDA actions as they are already connected to an host // action or fat binary. CudaDeviceActions.clear(); } return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; } else if (CurPhase == phases::Link) { if (!ShouldLink) return ABRT_Success; // Save CudaDeviceActions to DeviceLinkerInputs for each GPU subarch. // This happens to each device action originated from each input file. // Later on, device actions in DeviceLinkerInputs are used to create // device link actions in appendLinkDependences and the created device // link actions are passed to the offload action as device dependence. DeviceLinkerInputs.resize(CudaDeviceActions.size()); auto LI = DeviceLinkerInputs.begin(); for (auto *A : CudaDeviceActions) { LI->push_back(A); ++LI; } // We will pass the device action as a host dependence, so we don't // need to do anything else with them. CudaDeviceActions.clear(); return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; } // By default, we produce an action for each device arch. for (Action *&A : CudaDeviceActions) A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A, AssociatedOffloadKind); if (CompileDeviceOnly && CurPhase == FinalPhase && BundleOutput && *BundleOutput) { for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { OffloadAction::DeviceDependences DDep; DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction( DDep, CudaDeviceActions[I]->getType()); } CudaFatBinary = C.MakeAction(CudaDeviceActions); CudaDeviceActions.clear(); } return (CompileDeviceOnly && (CurPhase == FinalPhase || (!ShouldLink && CurPhase == phases::Assemble))) ? ABRT_Ignore_Host : ABRT_Success; } void appendLinkDeviceActions(ActionList &AL) override { if (DeviceLinkerInputs.size() == 0) return; assert(DeviceLinkerInputs.size() == GpuArchList.size() && "Linker inputs and GPU arch list sizes do not match."); ActionList Actions; unsigned I = 0; // Append a new link action for each device. // Each entry in DeviceLinkerInputs corresponds to a GPU arch. for (auto &LI : DeviceLinkerInputs) { types::ID Output = Args.hasArg(options::OPT_emit_llvm) ? types::TY_LLVM_BC : types::TY_Image; auto *DeviceLinkAction = C.MakeAction(LI, Output); // Linking all inputs for the current GPU arch. // LI contains all the inputs for the linker. OffloadAction::DeviceDependences DeviceLinkDeps; DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], GpuArchList[I], AssociatedOffloadKind); Actions.push_back(C.MakeAction( DeviceLinkDeps, DeviceLinkAction->getType())); ++I; } DeviceLinkerInputs.clear(); // If emitting LLVM, do not generate final host/device compilation action if (Args.hasArg(options::OPT_emit_llvm)) { AL.append(Actions); return; } // Create a host object from all the device images by embedding them // in a fat binary for mixed host-device compilation. For device-only // compilation, creates a fat binary. OffloadAction::DeviceDependences DDeps; if (!CompileDeviceOnly || !BundleOutput || *BundleOutput) { auto *TopDeviceLinkAction = C.MakeAction( Actions, CompileDeviceOnly ? types::TY_HIP_FATBIN : types::TY_Object); DDeps.add(*TopDeviceLinkAction, *ToolChains[0], nullptr, AssociatedOffloadKind); // Offload the host object to the host linker. AL.push_back( C.MakeAction(DDeps, TopDeviceLinkAction->getType())); } else { AL.append(Actions); } } Action* appendLinkHostActions(ActionList &AL) override { return AL.back(); } void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {} }; /// /// TODO: Add the implementation for other specialized builders here. /// /// Specialized builders being used by this offloading action builder. SmallVector SpecializedBuilders; /// Flag set to true if all valid builders allow file bundling/unbundling. bool CanUseBundler; public: OffloadingActionBuilder(Compilation &C, DerivedArgList &Args, const Driver::InputList &Inputs) : C(C) { // Create a specialized builder for each device toolchain. IsValid = true; // Create a specialized builder for CUDA. SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs)); // Create a specialized builder for HIP. SpecializedBuilders.push_back(new HIPActionBuilder(C, Args, Inputs)); // // TODO: Build other specialized builders here. // // Initialize all the builders, keeping track of errors. If all valid // builders agree that we can use bundling, set the flag to true. unsigned ValidBuilders = 0u; unsigned ValidBuildersSupportingBundling = 0u; for (auto *SB : SpecializedBuilders) { IsValid = IsValid && !SB->initialize(); // Update the counters if the builder is valid. if (SB->isValid()) { ++ValidBuilders; if (SB->canUseBundlerUnbundler()) ++ValidBuildersSupportingBundling; } } CanUseBundler = ValidBuilders && ValidBuilders == ValidBuildersSupportingBundling; } ~OffloadingActionBuilder() { for (auto *SB : SpecializedBuilders) delete SB; } /// Record a host action and its originating input argument. void recordHostAction(Action *HostAction, const Arg *InputArg) { assert(HostAction && "Invalid host action"); assert(InputArg && "Invalid input argument"); auto Loc = HostActionToInputArgMap.find(HostAction); if (Loc == HostActionToInputArgMap.end()) HostActionToInputArgMap[HostAction] = InputArg; assert(HostActionToInputArgMap[HostAction] == InputArg && "host action mapped to multiple input arguments"); } /// Generate an action that adds device dependences (if any) to a host action. /// If no device dependence actions exist, just return the host action \a /// HostAction. If an error is found or if no builder requires the host action /// to be generated, return nullptr. Action * addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg, phases::ID CurPhase, phases::ID FinalPhase, DeviceActionBuilder::PhasesTy &Phases) { if (!IsValid) return nullptr; if (SpecializedBuilders.empty()) return HostAction; assert(HostAction && "Invalid host action!"); recordHostAction(HostAction, InputArg); OffloadAction::DeviceDependences DDeps; // Check if all the programming models agree we should not emit the host // action. Also, keep track of the offloading kinds employed. auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; unsigned InactiveBuilders = 0u; unsigned IgnoringBuilders = 0u; for (auto *SB : SpecializedBuilders) { if (!SB->isValid()) { ++InactiveBuilders; continue; } auto RetCode = SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases); // If the builder explicitly says the host action should be ignored, // we need to increment the variable that tracks the builders that request // the host object to be ignored. if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host) ++IgnoringBuilders; // Unless the builder was inactive for this action, we have to record the // offload kind because the host will have to use it. if (RetCode != DeviceActionBuilder::ABRT_Inactive) OffloadKind |= SB->getAssociatedOffloadKind(); } // If all builders agree that the host object should be ignored, just return // nullptr. if (IgnoringBuilders && SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders)) return nullptr; if (DDeps.getActions().empty()) return HostAction; // We have dependences we need to bundle together. We use an offload action // for that. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch=*/nullptr, DDeps); return C.MakeAction(HDep, DDeps); } /// Generate an action that adds a host dependence to a device action. The /// results will be kept in this action builder. Return true if an error was /// found. bool addHostDependenceToDeviceActions(Action *&HostAction, const Arg *InputArg) { if (!IsValid) return true; recordHostAction(HostAction, InputArg); // If we are supporting bundling/unbundling and the current action is an // input action of non-source file, we replace the host action by the // unbundling action. The bundler tool has the logic to detect if an input // is a bundle or not and if the input is not a bundle it assumes it is a // host file. Therefore it is safe to create an unbundling action even if // the input is not a bundle. if (CanUseBundler && isa(HostAction) && InputArg->getOption().getKind() == llvm::opt::Option::InputClass && (!types::isSrcFile(HostAction->getType()) || HostAction->getType() == types::TY_PP_HIP)) { auto UnbundlingHostAction = C.MakeAction(HostAction); UnbundlingHostAction->registerDependentActionInfo( C.getSingleOffloadToolChain(), /*BoundArch=*/StringRef(), Action::OFK_Host); HostAction = UnbundlingHostAction; recordHostAction(HostAction, InputArg); } assert(HostAction && "Invalid host action!"); // Register the offload kinds that are used. auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; for (auto *SB : SpecializedBuilders) { if (!SB->isValid()) continue; auto RetCode = SB->addDeviceDependences(HostAction); // Host dependences for device actions are not compatible with that same // action being ignored. assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host && "Host dependence not expected to be ignored.!"); // Unless the builder was inactive for this action, we have to record the // offload kind because the host will have to use it. if (RetCode != DeviceActionBuilder::ABRT_Inactive) OffloadKind |= SB->getAssociatedOffloadKind(); } // Do not use unbundler if the Host does not depend on device action. if (OffloadKind == Action::OFK_None && CanUseBundler) if (auto *UA = dyn_cast(HostAction)) HostAction = UA->getInputs().back(); return false; } /// Add the offloading top level actions to the provided action list. This /// function can replace the host action by a bundling action if the /// programming models allow it. bool appendTopLevelActions(ActionList &AL, Action *HostAction, const Arg *InputArg) { if (HostAction) recordHostAction(HostAction, InputArg); // Get the device actions to be appended. ActionList OffloadAL; for (auto *SB : SpecializedBuilders) { if (!SB->isValid()) continue; SB->appendTopLevelActions(OffloadAL); } // If we can use the bundler, replace the host action by the bundling one in // the resulting list. Otherwise, just append the device actions. For // device only compilation, HostAction is a null pointer, therefore only do // this when HostAction is not a null pointer. if (CanUseBundler && HostAction && HostAction->getType() != types::TY_Nothing && !OffloadAL.empty()) { // Add the host action to the list in order to create the bundling action. OffloadAL.push_back(HostAction); // We expect that the host action was just appended to the action list // before this method was called. assert(HostAction == AL.back() && "Host action not in the list??"); HostAction = C.MakeAction(OffloadAL); recordHostAction(HostAction, InputArg); AL.back() = HostAction; } else AL.append(OffloadAL.begin(), OffloadAL.end()); // Propagate to the current host action (if any) the offload information // associated with the current input. if (HostAction) HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg], /*BoundArch=*/nullptr); return false; } void appendDeviceLinkActions(ActionList &AL) { for (DeviceActionBuilder *SB : SpecializedBuilders) { if (!SB->isValid()) continue; SB->appendLinkDeviceActions(AL); } } Action *makeHostLinkAction() { // Build a list of device linking actions. ActionList DeviceAL; appendDeviceLinkActions(DeviceAL); if (DeviceAL.empty()) return nullptr; // Let builders add host linking actions. Action* HA = nullptr; for (DeviceActionBuilder *SB : SpecializedBuilders) { if (!SB->isValid()) continue; HA = SB->appendLinkHostActions(DeviceAL); // This created host action has no originating input argument, therefore // needs to set its offloading kind directly. if (HA) HA->propagateHostOffloadInfo(SB->getAssociatedOffloadKind(), /*BoundArch=*/nullptr); } return HA; } /// Processes the host linker action. This currently consists of replacing it /// with an offload action if there are device link objects and propagate to /// the host action all the offload kinds used in the current compilation. The /// resulting action is returned. Action *processHostLinkAction(Action *HostAction) { // Add all the dependences from the device linking actions. OffloadAction::DeviceDependences DDeps; for (auto *SB : SpecializedBuilders) { if (!SB->isValid()) continue; SB->appendLinkDependences(DDeps); } // Calculate all the offload kinds used in the current compilation. unsigned ActiveOffloadKinds = 0u; for (auto &I : InputArgToOffloadKindMap) ActiveOffloadKinds |= I.second; // If we don't have device dependencies, we don't have to create an offload // action. if (DDeps.getActions().empty()) { // Set all the active offloading kinds to the link action. Given that it // is a link action it is assumed to depend on all actions generated so // far. HostAction->setHostOffloadInfo(ActiveOffloadKinds, /*BoundArch=*/nullptr); // Propagate active offloading kinds for each input to the link action. // Each input may have different active offloading kind. for (auto *A : HostAction->inputs()) { auto ArgLoc = HostActionToInputArgMap.find(A); if (ArgLoc == HostActionToInputArgMap.end()) continue; auto OFKLoc = InputArgToOffloadKindMap.find(ArgLoc->second); if (OFKLoc == InputArgToOffloadKindMap.end()) continue; A->propagateHostOffloadInfo(OFKLoc->second, /*BoundArch=*/nullptr); } return HostAction; } // Create the offload action with all dependences. When an offload action // is created the kinds are propagated to the host action, so we don't have // to do that explicitly here. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch*/ nullptr, ActiveOffloadKinds); return C.MakeAction(HDep, DDeps); } }; } // anonymous namespace. void Driver::handleArguments(Compilation &C, DerivedArgList &Args, const InputList &Inputs, ActionList &Actions) const { // Ignore /Yc/Yu if both /Yc and /Yu passed but with different filenames. Arg *YcArg = Args.getLastArg(options::OPT__SLASH_Yc); Arg *YuArg = Args.getLastArg(options::OPT__SLASH_Yu); if (YcArg && YuArg && strcmp(YcArg->getValue(), YuArg->getValue()) != 0) { Diag(clang::diag::warn_drv_ycyu_different_arg_clang_cl); Args.eraseArg(options::OPT__SLASH_Yc); Args.eraseArg(options::OPT__SLASH_Yu); YcArg = YuArg = nullptr; } if (YcArg && Inputs.size() > 1) { Diag(clang::diag::warn_drv_yc_multiple_inputs_clang_cl); Args.eraseArg(options::OPT__SLASH_Yc); YcArg = nullptr; } Arg *FinalPhaseArg; phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg); if (FinalPhase == phases::Link) { if (Args.hasArgNoClaim(options::OPT_hipstdpar)) { Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link)); Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_frtlib_add_rpath)); } // Emitting LLVM while linking disabled except in HIPAMD Toolchain if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link)) Diag(clang::diag::err_drv_emit_llvm_link); if (IsCLMode() && LTOMode != LTOK_None && !Args.getLastArgValue(options::OPT_fuse_ld_EQ) .equals_insensitive("lld")) Diag(clang::diag::err_drv_lto_without_lld); // If -dumpdir is not specified, give a default prefix derived from the link // output filename. For example, `clang -g -gsplit-dwarf a.c -o x` passes // `-dumpdir x-` to cc1. If -o is unspecified, use // stem(getDefaultImageName()) (usually stem("a.out") = "a"). if (!Args.hasArg(options::OPT_dumpdir)) { Arg *FinalOutput = Args.getLastArg(options::OPT_o, options::OPT__SLASH_o); Arg *Arg = Args.MakeSeparateArg( nullptr, getOpts().getOption(options::OPT_dumpdir), Args.MakeArgString( (FinalOutput ? FinalOutput->getValue() : llvm::sys::path::stem(getDefaultImageName())) + "-")); Arg->claim(); Args.append(Arg); } } if (FinalPhase == phases::Preprocess || Args.hasArg(options::OPT__SLASH_Y_)) { // If only preprocessing or /Y- is used, all pch handling is disabled. // Rather than check for it everywhere, just remove clang-cl pch-related // flags here. Args.eraseArg(options::OPT__SLASH_Fp); Args.eraseArg(options::OPT__SLASH_Yc); Args.eraseArg(options::OPT__SLASH_Yu); YcArg = YuArg = nullptr; } unsigned LastPLSize = 0; for (auto &I : Inputs) { types::ID InputType = I.first; const Arg *InputArg = I.second; auto PL = types::getCompilationPhases(InputType); LastPLSize = PL.size(); // If the first step comes after the final phase we are doing as part of // this compilation, warn the user about it. phases::ID InitialPhase = PL[0]; if (InitialPhase > FinalPhase) { if (InputArg->isClaimed()) continue; // Claim here to avoid the more general unused warning. InputArg->claim(); // Suppress all unused style warnings with -Qunused-arguments if (Args.hasArg(options::OPT_Qunused_arguments)) continue; // Special case when final phase determined by binary name, rather than // by a command-line argument with a corresponding Arg. if (CCCIsCPP()) Diag(clang::diag::warn_drv_input_file_unused_by_cpp) << InputArg->getAsString(Args) << getPhaseName(InitialPhase); // Special case '-E' warning on a previously preprocessed file to make // more sense. else if (InitialPhase == phases::Compile && (Args.getLastArg(options::OPT__SLASH_EP, options::OPT__SLASH_P) || Args.getLastArg(options::OPT_E) || Args.getLastArg(options::OPT_M, options::OPT_MM)) && getPreprocessedType(InputType) == types::TY_INVALID) Diag(clang::diag::warn_drv_preprocessed_input_file_unused) << InputArg->getAsString(Args) << !!FinalPhaseArg << (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : ""); else Diag(clang::diag::warn_drv_input_file_unused) << InputArg->getAsString(Args) << getPhaseName(InitialPhase) << !!FinalPhaseArg << (FinalPhaseArg ? FinalPhaseArg->getOption().getName() : ""); continue; } if (YcArg) { // Add a separate precompile phase for the compile phase. if (FinalPhase >= phases::Compile) { const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType); // Build the pipeline for the pch file. Action *ClangClPch = C.MakeAction(*InputArg, HeaderType); for (phases::ID Phase : types::getCompilationPhases(HeaderType)) ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch); assert(ClangClPch); Actions.push_back(ClangClPch); // The driver currently exits after the first failed command. This // relies on that behavior, to make sure if the pch generation fails, // the main compilation won't run. // FIXME: If the main compilation fails, the PCH generation should // probably not be considered successful either. } } } // If we are linking, claim any options which are obviously only used for // compilation. // FIXME: Understand why the last Phase List length is used here. if (FinalPhase == phases::Link && LastPLSize == 1) { Args.ClaimAllArgs(options::OPT_CompileOnly_Group); Args.ClaimAllArgs(options::OPT_cl_compile_Group); } } void Driver::BuildActions(Compilation &C, DerivedArgList &Args, const InputList &Inputs, ActionList &Actions) const { llvm::PrettyStackTraceString CrashInfo("Building compilation actions"); if (!SuppressMissingInputWarning && Inputs.empty()) { Diag(clang::diag::err_drv_no_input_files); return; } // Diagnose misuse of /Fo. if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fo)) { StringRef V = A->getValue(); if (Inputs.size() > 1 && !V.empty() && !llvm::sys::path::is_separator(V.back())) { // Check whether /Fo tries to name an output file for multiple inputs. Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources) << A->getSpelling() << V; Args.eraseArg(options::OPT__SLASH_Fo); } } // Diagnose misuse of /Fa. if (Arg *A = Args.getLastArg(options::OPT__SLASH_Fa)) { StringRef V = A->getValue(); if (Inputs.size() > 1 && !V.empty() && !llvm::sys::path::is_separator(V.back())) { // Check whether /Fa tries to name an asm file for multiple inputs. Diag(clang::diag::err_drv_out_file_argument_with_multiple_sources) << A->getSpelling() << V; Args.eraseArg(options::OPT__SLASH_Fa); } } // Diagnose misuse of /o. if (Arg *A = Args.getLastArg(options::OPT__SLASH_o)) { if (A->getValue()[0] == '\0') { // It has to have a value. Diag(clang::diag::err_drv_missing_argument) << A->getSpelling() << 1; Args.eraseArg(options::OPT__SLASH_o); } } handleArguments(C, Args, Inputs, Actions); bool UseNewOffloadingDriver = C.isOffloadingHostKind(Action::OFK_OpenMP) || Args.hasFlag(options::OPT_offload_new_driver, options::OPT_no_offload_new_driver, false); // Builder to be used to build offloading actions. std::unique_ptr OffloadBuilder = !UseNewOffloadingDriver ? std::make_unique(C, Args, Inputs) : nullptr; // Construct the actions to perform. ExtractAPIJobAction *ExtractAPIAction = nullptr; ActionList LinkerInputs; ActionList MergerInputs; for (auto &I : Inputs) { types::ID InputType = I.first; const Arg *InputArg = I.second; auto PL = types::getCompilationPhases(*this, Args, InputType); if (PL.empty()) continue; auto FullPL = types::getCompilationPhases(InputType); // Build the pipeline for this file. Action *Current = C.MakeAction(*InputArg, InputType); // Use the current host action in any of the offloading actions, if // required. if (!UseNewOffloadingDriver) if (OffloadBuilder->addHostDependenceToDeviceActions(Current, InputArg)) break; for (phases::ID Phase : PL) { // Add any offload action the host action depends on. if (!UseNewOffloadingDriver) Current = OffloadBuilder->addDeviceDependencesToHostAction( Current, InputArg, Phase, PL.back(), FullPL); if (!Current) break; // Queue linker inputs. if (Phase == phases::Link) { assert(Phase == PL.back() && "linking must be final compilation step."); // We don't need to generate additional link commands if emitting AMD // bitcode or compiling only for the offload device if (!(C.getInputArgs().hasArg(options::OPT_hip_link) && (C.getInputArgs().hasArg(options::OPT_emit_llvm))) && !offloadDeviceOnly()) LinkerInputs.push_back(Current); Current = nullptr; break; } // TODO: Consider removing this because the merged may not end up being // the final Phase in the pipeline. Perhaps the merged could just merge // and then pass an artifact of some sort to the Link Phase. // Queue merger inputs. if (Phase == phases::IfsMerge) { assert(Phase == PL.back() && "merging must be final compilation step."); MergerInputs.push_back(Current); Current = nullptr; break; } if (Phase == phases::Precompile && ExtractAPIAction) { ExtractAPIAction->addHeaderInput(Current); Current = nullptr; break; } // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? // Otherwise construct the appropriate action. Action *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current); // We didn't create a new action, so we will just move to the next phase. if (NewCurrent == Current) continue; if (auto *EAA = dyn_cast(NewCurrent)) ExtractAPIAction = EAA; Current = NewCurrent; // Try to build the offloading actions and add the result as a dependency // to the host. if (UseNewOffloadingDriver) Current = BuildOffloadingActions(C, Args, I, Current); // Use the current host action in any of the offloading actions, if // required. else if (OffloadBuilder->addHostDependenceToDeviceActions(Current, InputArg)) break; if (Current->getType() == types::TY_Nothing) break; } // If we ended with something, add to the output list. if (Current) Actions.push_back(Current); // Add any top level actions generated for offloading. if (!UseNewOffloadingDriver) OffloadBuilder->appendTopLevelActions(Actions, Current, InputArg); else if (Current) Current->propagateHostOffloadInfo(C.getActiveOffloadKinds(), /*BoundArch=*/nullptr); } // Add a link action if necessary. if (LinkerInputs.empty()) { Arg *FinalPhaseArg; if (getFinalPhase(Args, &FinalPhaseArg) == phases::Link) if (!UseNewOffloadingDriver) OffloadBuilder->appendDeviceLinkActions(Actions); } if (!LinkerInputs.empty()) { if (!UseNewOffloadingDriver) if (Action *Wrapper = OffloadBuilder->makeHostLinkAction()) LinkerInputs.push_back(Wrapper); Action *LA; // Check if this Linker Job should emit a static library. if (ShouldEmitStaticLibrary(Args)) { LA = C.MakeAction(LinkerInputs, types::TY_Image); } else if (UseNewOffloadingDriver || Args.hasArg(options::OPT_offload_link)) { LA = C.MakeAction(LinkerInputs, types::TY_Image); LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(), /*BoundArch=*/nullptr); } else { LA = C.MakeAction(LinkerInputs, types::TY_Image); } if (!UseNewOffloadingDriver) LA = OffloadBuilder->processHostLinkAction(LA); Actions.push_back(LA); } // Add an interface stubs merge action if necessary. if (!MergerInputs.empty()) Actions.push_back( C.MakeAction(MergerInputs, types::TY_Image)); if (Args.hasArg(options::OPT_emit_interface_stubs)) { auto PhaseList = types::getCompilationPhases( types::TY_IFS_CPP, Args.hasArg(options::OPT_c) ? phases::Compile : phases::IfsMerge); ActionList MergerInputs; for (auto &I : Inputs) { types::ID InputType = I.first; const Arg *InputArg = I.second; // Currently clang and the llvm assembler do not support generating symbol // stubs from assembly, so we skip the input on asm files. For ifs files // we rely on the normal pipeline setup in the pipeline setup code above. if (InputType == types::TY_IFS || InputType == types::TY_PP_Asm || InputType == types::TY_Asm) continue; Action *Current = C.MakeAction(*InputArg, InputType); for (auto Phase : PhaseList) { switch (Phase) { default: llvm_unreachable( "IFS Pipeline can only consist of Compile followed by IfsMerge."); case phases::Compile: { // Only IfsMerge (llvm-ifs) can handle .o files by looking for ifs // files where the .o file is located. The compile action can not // handle this. if (InputType == types::TY_Object) break; Current = C.MakeAction(Current, types::TY_IFS_CPP); break; } case phases::IfsMerge: { assert(Phase == PhaseList.back() && "merging must be final compilation step."); MergerInputs.push_back(Current); Current = nullptr; break; } } } // If we ended with something, add to the output list. if (Current) Actions.push_back(Current); } // Add an interface stubs merge action if necessary. if (!MergerInputs.empty()) Actions.push_back( C.MakeAction(MergerInputs, types::TY_Image)); } for (auto Opt : {options::OPT_print_supported_cpus, options::OPT_print_supported_extensions, options::OPT_print_enabled_extensions}) { // If --print-supported-cpus, -mcpu=? or -mtune=? is specified, build a // custom Compile phase that prints out supported cpu models and quits. // // If either --print-supported-extensions or --print-enabled-extensions is // specified, call the corresponding helper function that prints out the // supported/enabled extensions and quits. if (Arg *A = Args.getLastArg(Opt)) { if (Opt == options::OPT_print_supported_extensions && !C.getDefaultToolChain().getTriple().isRISCV() && !C.getDefaultToolChain().getTriple().isAArch64() && !C.getDefaultToolChain().getTriple().isARM()) { C.getDriver().Diag(diag::err_opt_not_valid_on_target) << "--print-supported-extensions"; return; } if (Opt == options::OPT_print_enabled_extensions && !C.getDefaultToolChain().getTriple().isRISCV() && !C.getDefaultToolChain().getTriple().isAArch64()) { C.getDriver().Diag(diag::err_opt_not_valid_on_target) << "--print-enabled-extensions"; return; } // Use the -mcpu=? flag as the dummy input to cc1. Actions.clear(); Action *InputAc = C.MakeAction(*A, types::TY_C); Actions.push_back( C.MakeAction(InputAc, types::TY_Nothing)); for (auto &I : Inputs) I.second->claim(); } } // Call validator for dxil when -Vd not in Args. if (C.getDefaultToolChain().getTriple().isDXIL()) { // Only add action when needValidation. const auto &TC = static_cast(C.getDefaultToolChain()); if (TC.requiresValidation(Args)) { Action *LastAction = Actions.back(); Actions.push_back(C.MakeAction( LastAction, types::TY_DX_CONTAINER)); } } // Claim ignored clang-cl options. Args.ClaimAllArgs(options::OPT_cl_ignored_Group); } /// Returns the canonical name for the offloading architecture when using a HIP /// or CUDA architecture. static StringRef getCanonicalArchString(Compilation &C, const llvm::opt::DerivedArgList &Args, StringRef ArchStr, const llvm::Triple &Triple, bool SuppressError = false) { // Lookup the CUDA / HIP architecture string. Only report an error if we were // expecting the triple to be only NVPTX / AMDGPU. OffloadArch Arch = StringToOffloadArch(getProcessorFromTargetID(Triple, ArchStr)); if (!SuppressError && Triple.isNVPTX() && (Arch == OffloadArch::UNKNOWN || !IsNVIDIAOffloadArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "CUDA" << ArchStr; return StringRef(); } else if (!SuppressError && Triple.isAMDGPU() && (Arch == OffloadArch::UNKNOWN || !IsAMDOffloadArch(Arch))) { C.getDriver().Diag(clang::diag::err_drv_offload_bad_gpu_arch) << "HIP" << ArchStr; return StringRef(); } if (IsNVIDIAOffloadArch(Arch)) return Args.MakeArgStringRef(OffloadArchToString(Arch)); if (IsAMDOffloadArch(Arch)) { llvm::StringMap Features; auto HIPTriple = getHIPOffloadTargetTriple(C.getDriver(), C.getInputArgs()); if (!HIPTriple) return StringRef(); auto Arch = parseTargetID(*HIPTriple, ArchStr, &Features); if (!Arch) { C.getDriver().Diag(clang::diag::err_drv_bad_target_id) << ArchStr; C.setContainsError(); return StringRef(); } return Args.MakeArgStringRef(getCanonicalTargetID(*Arch, Features)); } // If the input isn't CUDA or HIP just return the architecture. return ArchStr; } /// Checks if the set offloading architectures does not conflict. Returns the /// incompatible pair if a conflict occurs. static std::optional> getConflictOffloadArchCombination(const llvm::DenseSet &Archs, llvm::Triple Triple) { if (!Triple.isAMDGPU()) return std::nullopt; std::set ArchSet; llvm::copy(Archs, std::inserter(ArchSet, ArchSet.begin())); return getConflictTargetIDCombination(ArchSet); } llvm::DenseSet Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Action::OffloadKind Kind, const ToolChain *TC, bool SuppressError) const { if (!TC) TC = &C.getDefaultToolChain(); // --offload and --offload-arch options are mutually exclusive. if (Args.hasArgNoClaim(options::OPT_offload_EQ) && Args.hasArgNoClaim(options::OPT_offload_arch_EQ, options::OPT_no_offload_arch_EQ)) { C.getDriver().Diag(diag::err_opt_not_valid_with_opt) << "--offload" << (Args.hasArgNoClaim(options::OPT_offload_arch_EQ) ? "--offload-arch" : "--no-offload-arch"); } if (KnownArchs.contains(TC)) return KnownArchs.lookup(TC); llvm::DenseSet Archs; for (auto *Arg : Args) { // Extract any '--[no-]offload-arch' arguments intended for this toolchain. std::unique_ptr ExtractedArg = nullptr; if (Arg->getOption().matches(options::OPT_Xopenmp_target_EQ) && ToolChain::getOpenMPTriple(Arg->getValue(0)) == TC->getTriple()) { Arg->claim(); unsigned Index = Args.getBaseArgs().MakeIndex(Arg->getValue(1)); ExtractedArg = getOpts().ParseOneArg(Args, Index); Arg = ExtractedArg.get(); } // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { for (StringRef Arch : llvm::split(Arg->getValue(), ",")) { if (Arch == "native" || Arch.empty()) { auto GPUsOrErr = TC->getSystemGPUArchs(Args); if (!GPUsOrErr) { if (SuppressError) llvm::consumeError(GPUsOrErr.takeError()); else TC->getDriver().Diag(diag::err_drv_undetermined_gpu_arch) << llvm::Triple::getArchTypeName(TC->getArch()) << llvm::toString(GPUsOrErr.takeError()) << "--offload-arch"; continue; } for (auto ArchStr : *GPUsOrErr) { Archs.insert( getCanonicalArchString(C, Args, Args.MakeArgString(ArchStr), TC->getTriple(), SuppressError)); } } else { StringRef ArchStr = getCanonicalArchString( C, Args, Arch, TC->getTriple(), SuppressError); if (ArchStr.empty()) return Archs; Archs.insert(ArchStr); } } } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { for (StringRef Arch : llvm::split(Arg->getValue(), ",")) { if (Arch == "all") { Archs.clear(); } else { StringRef ArchStr = getCanonicalArchString( C, Args, Arch, TC->getTriple(), SuppressError); if (ArchStr.empty()) return Archs; Archs.erase(ArchStr); } } } } if (auto ConflictingArchs = getConflictOffloadArchCombination(Archs, TC->getTriple())) { C.getDriver().Diag(clang::diag::err_drv_bad_offload_arch_combo) << ConflictingArchs->first << ConflictingArchs->second; C.setContainsError(); } // Skip filling defaults if we're just querying what is availible. if (SuppressError) return Archs; if (Archs.empty()) { if (Kind == Action::OFK_Cuda) Archs.insert(OffloadArchToString(OffloadArch::CudaDefault)); else if (Kind == Action::OFK_HIP) Archs.insert(OffloadArchToString(OffloadArch::HIPDefault)); else if (Kind == Action::OFK_OpenMP) Archs.insert(StringRef()); } else { Args.ClaimAllArgs(options::OPT_offload_arch_EQ); Args.ClaimAllArgs(options::OPT_no_offload_arch_EQ); } return Archs; } Action *Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, const InputTy &Input, Action *HostAction) const { // Don't build offloading actions if explicitly disabled or we do not have a // valid source input and compile action to embed it in. If preprocessing only // ignore embedding. if (offloadHostOnly() || !types::isSrcFile(Input.first) || !(isa(HostAction) || getFinalPhase(Args) == phases::Preprocess)) return HostAction; ActionList OffloadActions; OffloadAction::DeviceDependences DDeps; const Action::OffloadKind OffloadKinds[] = { Action::OFK_OpenMP, Action::OFK_Cuda, Action::OFK_HIP}; for (Action::OffloadKind Kind : OffloadKinds) { SmallVector ToolChains; ActionList DeviceActions; auto TCRange = C.getOffloadToolChains(Kind); for (auto TI = TCRange.first, TE = TCRange.second; TI != TE; ++TI) ToolChains.push_back(TI->second); if (ToolChains.empty()) continue; types::ID InputType = Input.first; const Arg *InputArg = Input.second; // The toolchain can be active for unsupported file types. if ((Kind == Action::OFK_Cuda && !types::isCuda(InputType)) || (Kind == Action::OFK_HIP && !types::isHIP(InputType))) continue; // Get the product of all bound architectures and toolchains. SmallVector> TCAndArchs; for (const ToolChain *TC : ToolChains) { llvm::DenseSet Arches = getOffloadArchs(C, Args, Kind, TC); SmallVector Sorted(Arches.begin(), Arches.end()); llvm::sort(Sorted); for (StringRef Arch : Sorted) TCAndArchs.push_back(std::make_pair(TC, Arch)); } for (unsigned I = 0, E = TCAndArchs.size(); I != E; ++I) DeviceActions.push_back(C.MakeAction(*InputArg, InputType)); if (DeviceActions.empty()) return HostAction; auto PL = types::getCompilationPhases(*this, Args, InputType); for (phases::ID Phase : PL) { if (Phase == phases::Link) { assert(Phase == PL.back() && "linking must be final compilation step."); break; } auto TCAndArch = TCAndArchs.begin(); for (Action *&A : DeviceActions) { if (A->getType() == types::TY_Nothing) continue; // Propagate the ToolChain so we can use it in ConstructPhaseAction. A->propagateDeviceOffloadInfo(Kind, TCAndArch->second.data(), TCAndArch->first); A = ConstructPhaseAction(C, Args, Phase, A, Kind); if (isa(A) && isa(HostAction) && Kind == Action::OFK_OpenMP && HostAction->getType() != types::TY_Nothing) { // OpenMP offloading has a dependency on the host compile action to // identify which declarations need to be emitted. This shouldn't be // collapsed with any other actions so we can use it in the device. HostAction->setCannotBeCollapsedWithNextDependentAction(); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); A = C.MakeAction(HDep, DDep); } ++TCAndArch; } } // Compiling HIP in non-RDC mode requires linking each action individually. for (Action *&A : DeviceActions) { if ((A->getType() != types::TY_Object && A->getType() != types::TY_LTO_BC) || Kind != Action::OFK_HIP || Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) continue; ActionList LinkerInput = {A}; A = C.MakeAction(LinkerInput, types::TY_Image); } auto TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadAction::DeviceDependences DDep; DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); // Compiling CUDA in non-RDC mode uses the PTX output if available. for (Action *Input : A->getInputs()) if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind); OffloadActions.push_back(C.MakeAction(DDep, A->getType())); ++TCAndArch; } } // HIP code in non-RDC mode will bundle the output if it invoked the linker. bool ShouldBundleHIP = C.isOffloadingHostKind(Action::OFK_HIP) && Args.hasFlag(options::OPT_gpu_bundle_output, options::OPT_no_gpu_bundle_output, true) && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) && !llvm::any_of(OffloadActions, [](Action *A) { return A->getType() != types::TY_Image; }); // All kinds exit now in device-only mode except for non-RDC mode HIP. if (offloadDeviceOnly() && !ShouldBundleHIP) return C.MakeAction(DDeps, types::TY_Nothing); if (OffloadActions.empty()) return HostAction; OffloadAction::DeviceDependences DDep; if (C.isOffloadingHostKind(Action::OFK_Cuda) && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) { // If we are not in RDC-mode we just emit the final CUDA fatbinary for // each translation unit without requiring any linking. Action *FatbinAction = C.MakeAction(OffloadActions, types::TY_CUDA_FATBIN); DDep.add(*FatbinAction, *C.getSingleOffloadToolChain(), nullptr, Action::OFK_Cuda); } else if (C.isOffloadingHostKind(Action::OFK_HIP) && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) { // If we are not in RDC-mode we just emit the final HIP fatbinary for each // translation unit, linking each input individually. Action *FatbinAction = C.MakeAction(OffloadActions, types::TY_HIP_FATBIN); DDep.add(*FatbinAction, *C.getSingleOffloadToolChain(), nullptr, Action::OFK_HIP); } else { // Package all the offloading actions into a single output that can be // embedded in the host and linked. Action *PackagerAction = C.MakeAction(OffloadActions, types::TY_Image); DDep.add(*PackagerAction, *C.getSingleOffloadToolChain(), nullptr, C.getActiveOffloadKinds()); } // HIP wants '--offload-device-only' to create a fatbinary by default. if (offloadDeviceOnly()) return C.MakeAction(DDep, types::TY_Nothing); // If we are unable to embed a single device output into the host, we need to // add each device output as a host dependency to ensure they are still built. bool SingleDeviceOutput = !llvm::any_of(OffloadActions, [](Action *A) { return A->getType() == types::TY_Nothing; }) && isa(HostAction); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), /*BoundArch=*/nullptr, SingleDeviceOutput ? DDep : DDeps); return C.MakeAction(HDep, SingleDeviceOutput ? DDep : DDeps); } Action *Driver::ConstructPhaseAction( Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input, Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Constructing phase actions"); // Some types skip the assembler phase (e.g., llvm-bc), but we can't // encode this in the steps because the intermediate type depends on // arguments. Just special case here. if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm) return Input; // Build the appropriate action. switch (Phase) { case phases::Link: llvm_unreachable("link action invalid here."); case phases::IfsMerge: llvm_unreachable("ifsmerge action invalid here."); case phases::Preprocess: { types::ID OutputTy; // -M and -MM specify the dependency file name by altering the output type, // -if -MD and -MMD are not specified. if (Args.hasArg(options::OPT_M, options::OPT_MM) && !Args.hasArg(options::OPT_MD, options::OPT_MMD)) { OutputTy = types::TY_Dependencies; } else { OutputTy = Input->getType(); // For these cases, the preprocessor is only translating forms, the Output // still needs preprocessing. if (!Args.hasFlag(options::OPT_frewrite_includes, options::OPT_fno_rewrite_includes, false) && !Args.hasFlag(options::OPT_frewrite_imports, options::OPT_fno_rewrite_imports, false) && !Args.hasFlag(options::OPT_fdirectives_only, options::OPT_fno_directives_only, false) && !CCGenDiagnostics) OutputTy = types::getPreprocessedType(OutputTy); assert(OutputTy != types::TY_INVALID && "Cannot preprocess this input type!"); } return C.MakeAction(Input, OutputTy); } case phases::Precompile: { // API extraction should not generate an actual precompilation action. if (Args.hasArg(options::OPT_extract_api)) return C.MakeAction(Input, types::TY_API_INFO); // With 'fexperimental-modules-reduced-bmi', we don't want to run the // precompile phase unless the user specified '--precompile'. In the case // the '--precompile' flag is enabled, we will try to emit the reduced BMI // as a by product in GenerateModuleInterfaceAction. if (Args.hasArg(options::OPT_modules_reduced_bmi) && !Args.getLastArg(options::OPT__precompile)) return Input; types::ID OutputTy = getPrecompiledType(Input->getType()); assert(OutputTy != types::TY_INVALID && "Cannot precompile this input type!"); // If we're given a module name, precompile header file inputs as a // module, not as a precompiled header. const char *ModName = nullptr; if (OutputTy == types::TY_PCH) { if (Arg *A = Args.getLastArg(options::OPT_fmodule_name_EQ)) ModName = A->getValue(); if (ModName) OutputTy = types::TY_ModuleFile; } if (Args.hasArg(options::OPT_fsyntax_only)) { // Syntax checks should not emit a PCH file OutputTy = types::TY_Nothing; } return C.MakeAction(Input, OutputTy); } case phases::Compile: { if (Args.hasArg(options::OPT_fsyntax_only)) return C.MakeAction(Input, types::TY_Nothing); if (Args.hasArg(options::OPT_rewrite_objc)) return C.MakeAction(Input, types::TY_RewrittenObjC); if (Args.hasArg(options::OPT_rewrite_legacy_objc)) return C.MakeAction(Input, types::TY_RewrittenLegacyObjC); if (Args.hasArg(options::OPT__analyze)) return C.MakeAction(Input, types::TY_Plist); if (Args.hasArg(options::OPT__migrate)) return C.MakeAction(Input, types::TY_Remap); if (Args.hasArg(options::OPT_emit_ast)) return C.MakeAction(Input, types::TY_AST); if (Args.hasArg(options::OPT_emit_cir)) return C.MakeAction(Input, types::TY_CIR); if (Args.hasArg(options::OPT_module_file_info)) return C.MakeAction(Input, types::TY_ModuleFile); if (Args.hasArg(options::OPT_verify_pch)) return C.MakeAction(Input, types::TY_Nothing); if (Args.hasArg(options::OPT_extract_api)) return C.MakeAction(Input, types::TY_API_INFO); return C.MakeAction(Input, types::TY_LLVM_BC); } case phases::Backend: { if (isUsingLTO() && TargetDeviceOffloadKind == Action::OFK_None) { types::ID Output; if (Args.hasArg(options::OPT_ffat_lto_objects) && !Args.hasArg(options::OPT_emit_llvm)) Output = types::TY_PP_Asm; else if (Args.hasArg(options::OPT_S)) Output = types::TY_LTO_IR; else Output = types::TY_LTO_BC; return C.MakeAction(Input, Output); } if (isUsingLTO(/* IsOffload */ true) && TargetDeviceOffloadKind != Action::OFK_None) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction(Input, Output); } if (Args.hasArg(options::OPT_emit_llvm) || (((Input->getOffloadingToolChain() && Input->getOffloadingToolChain()->getTriple().isAMDGPU()) || TargetDeviceOffloadKind == Action::OFK_HIP) && (Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false) || TargetDeviceOffloadKind == Action::OFK_OpenMP))) { types::ID Output = Args.hasArg(options::OPT_S) && (TargetDeviceOffloadKind == Action::OFK_None || offloadDeviceOnly() || (TargetDeviceOffloadKind == Action::OFK_HIP && !Args.hasFlag(options::OPT_offload_new_driver, options::OPT_no_offload_new_driver, false))) ? types::TY_LLVM_IR : types::TY_LLVM_BC; return C.MakeAction(Input, Output); } return C.MakeAction(Input, types::TY_PP_Asm); } case phases::Assemble: return C.MakeAction(std::move(Input), types::TY_Object); } llvm_unreachable("invalid phase in ConstructPhaseAction"); } void Driver::BuildJobs(Compilation &C) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o); // It is an error to provide a -o option if we are making multiple output // files. There are exceptions: // // IfsMergeJob: when generating interface stubs enabled we want to be able to // generate the stub file at the same time that we generate the real // library/a.out. So when a .o, .so, etc are the output, with clang interface // stubs there will also be a .ifs and .ifso at the same location. // // CompileJob of type TY_IFS_CPP: when generating interface stubs is enabled // and -c is passed, we still want to be able to generate a .ifs file while // we are also generating .o files. So we allow more than one output file in // this case as well. // // OffloadClass of type TY_Nothing: device-only output will place many outputs // into a single offloading action. We should count all inputs to the action // as outputs. Also ignore device-only outputs if we're compiling with // -fsyntax-only. if (FinalOutput) { unsigned NumOutputs = 0; unsigned NumIfsOutputs = 0; for (const Action *A : C.getActions()) { if (A->getType() != types::TY_Nothing && A->getType() != types::TY_DX_CONTAINER && !(A->getKind() == Action::IfsMergeJobClass || (A->getType() == clang::driver::types::TY_IFS_CPP && A->getKind() == clang::driver::Action::CompileJobClass && 0 == NumIfsOutputs++) || (A->getKind() == Action::BindArchClass && A->getInputs().size() && A->getInputs().front()->getKind() == Action::IfsMergeJobClass))) ++NumOutputs; else if (A->getKind() == Action::OffloadClass && A->getType() == types::TY_Nothing && !C.getArgs().hasArg(options::OPT_fsyntax_only)) NumOutputs += A->size(); } if (NumOutputs > 1) { Diag(clang::diag::err_drv_output_argument_with_multiple_files); FinalOutput = nullptr; } } const llvm::Triple &RawTriple = C.getDefaultToolChain().getTriple(); // Collect the list of architectures. llvm::StringSet<> ArchNames; if (RawTriple.isOSBinFormatMachO()) for (const Arg *A : C.getArgs()) if (A->getOption().matches(options::OPT_arch)) ArchNames.insert(A->getValue()); // Set of (Action, canonical ToolChain triple) pairs we've built jobs for. std::map, InputInfoList> CachedResults; for (Action *A : C.getActions()) { // If we are linking an image for multiple archs then the linker wants // -arch_multiple and -final_output . Unfortunately, this // doesn't fit in cleanly because we have to pass this information down. // // FIXME: This is a hack; find a cleaner way to integrate this into the // process. const char *LinkingOutput = nullptr; if (isa(A)) { if (FinalOutput) LinkingOutput = FinalOutput->getValue(); else LinkingOutput = getDefaultImageName(); } BuildJobsForAction(C, A, &C.getDefaultToolChain(), /*BoundArch*/ StringRef(), /*AtTopLevel*/ true, /*MultipleArchs*/ ArchNames.size() > 1, /*LinkingOutput*/ LinkingOutput, CachedResults, /*TargetDeviceOffloadKind*/ Action::OFK_None); } // If we have more than one job, then disable integrated-cc1 for now. Do this // also when we need to report process execution statistics. if (C.getJobs().size() > 1 || CCPrintProcessStats) for (auto &J : C.getJobs()) J.InProcess = false; if (CCPrintProcessStats) { C.setPostCallback([=](const Command &Cmd, int Res) { std::optional ProcStat = Cmd.getProcessStatistics(); if (!ProcStat) return; const char *LinkingOutput = nullptr; if (FinalOutput) LinkingOutput = FinalOutput->getValue(); else if (!Cmd.getOutputFilenames().empty()) LinkingOutput = Cmd.getOutputFilenames().front().c_str(); else LinkingOutput = getDefaultImageName(); if (CCPrintStatReportFilename.empty()) { using namespace llvm; // Human readable output. outs() << sys::path::filename(Cmd.getExecutable()) << ": " << "output=" << LinkingOutput; outs() << ", total=" << format("%.3f", ProcStat->TotalTime.count() / 1000.) << " ms" << ", user=" << format("%.3f", ProcStat->UserTime.count() / 1000.) << " ms" << ", mem=" << ProcStat->PeakMemory << " Kb\n"; } else { // CSV format. std::string Buffer; llvm::raw_string_ostream Out(Buffer); llvm::sys::printArg(Out, llvm::sys::path::filename(Cmd.getExecutable()), /*Quote*/ true); Out << ','; llvm::sys::printArg(Out, LinkingOutput, true); Out << ',' << ProcStat->TotalTime.count() << ',' << ProcStat->UserTime.count() << ',' << ProcStat->PeakMemory << '\n'; Out.flush(); std::error_code EC; llvm::raw_fd_ostream OS(CCPrintStatReportFilename, EC, llvm::sys::fs::OF_Append | llvm::sys::fs::OF_Text); if (EC) return; auto L = OS.lock(); if (!L) { llvm::errs() << "ERROR: Cannot lock file " << CCPrintStatReportFilename << ": " << toString(L.takeError()) << "\n"; return; } OS << Buffer; OS.flush(); } }); } // If the user passed -Qunused-arguments or there were errors, don't warn // about any unused arguments. if (Diags.hasErrorOccurred() || C.getArgs().hasArg(options::OPT_Qunused_arguments)) return; // Claim -fdriver-only here. (void)C.getArgs().hasArg(options::OPT_fdriver_only); // Claim -### here. (void)C.getArgs().hasArg(options::OPT__HASH_HASH_HASH); // Claim --driver-mode, --rsp-quoting, it was handled earlier. (void)C.getArgs().hasArg(options::OPT_driver_mode); (void)C.getArgs().hasArg(options::OPT_rsp_quoting); bool HasAssembleJob = llvm::any_of(C.getJobs(), [](auto &J) { // Match ClangAs and other derived assemblers of Tool. ClangAs uses a // longer ShortName "clang integrated assembler" while other assemblers just // use "assembler". return strstr(J.getCreator().getShortName(), "assembler"); }); for (Arg *A : C.getArgs()) { // FIXME: It would be nice to be able to send the argument to the // DiagnosticsEngine, so that extra values, position, and so on could be // printed. if (!A->isClaimed()) { if (A->getOption().hasFlag(options::NoArgumentUnused)) continue; // Suppress the warning automatically if this is just a flag, and it is an // instance of an argument we already claimed. const Option &Opt = A->getOption(); if (Opt.getKind() == Option::FlagClass) { bool DuplicateClaimed = false; for (const Arg *AA : C.getArgs().filtered(&Opt)) { if (AA->isClaimed()) { DuplicateClaimed = true; break; } } if (DuplicateClaimed) continue; } // In clang-cl, don't mention unknown arguments here since they have // already been warned about. if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN)) { if (A->getOption().hasFlag(options::TargetSpecific) && !A->isIgnoredTargetSpecific() && !HasAssembleJob && // When for example -### or -v is used // without a file, target specific options are not // consumed/validated. // Instead emitting an error emit a warning instead. !C.getActions().empty()) { Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << getTargetTriple(); } else { Diag(clang::diag::warn_drv_unused_argument) << A->getAsString(C.getArgs()); } } } } } namespace { /// Utility class to control the collapse of dependent actions and select the /// tools accordingly. class ToolSelector final { /// The tool chain this selector refers to. const ToolChain &TC; /// The compilation this selector refers to. const Compilation &C; /// The base action this selector refers to. const JobAction *BaseAction; /// Set to true if the current toolchain refers to host actions. bool IsHostSelector; /// Set to true if save-temps and embed-bitcode functionalities are active. bool SaveTemps; bool EmbedBitcode; /// Get previous dependent action or null if that does not exist. If /// \a CanBeCollapsed is false, that action must be legal to collapse or /// null will be returned. const JobAction *getPrevDependentAction(const ActionList &Inputs, ActionList &SavedOffloadAction, bool CanBeCollapsed = true) { // An option can be collapsed only if it has a single input. if (Inputs.size() != 1) return nullptr; Action *CurAction = *Inputs.begin(); if (CanBeCollapsed && !CurAction->isCollapsingWithNextDependentActionLegal()) return nullptr; // If the input action is an offload action. Look through it and save any // offload action that can be dropped in the event of a collapse. if (auto *OA = dyn_cast(CurAction)) { // If the dependent action is a device action, we will attempt to collapse // only with other device actions. Otherwise, we would do the same but // with host actions only. if (!IsHostSelector) { if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) { CurAction = OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true); if (CanBeCollapsed && !CurAction->isCollapsingWithNextDependentActionLegal()) return nullptr; SavedOffloadAction.push_back(OA); return dyn_cast(CurAction); } } else if (OA->hasHostDependence()) { CurAction = OA->getHostDependence(); if (CanBeCollapsed && !CurAction->isCollapsingWithNextDependentActionLegal()) return nullptr; SavedOffloadAction.push_back(OA); return dyn_cast(CurAction); } return nullptr; } return dyn_cast(CurAction); } /// Return true if an assemble action can be collapsed. bool canCollapseAssembleAction() const { return TC.useIntegratedAs() && !SaveTemps && !C.getArgs().hasArg(options::OPT_via_file_asm) && !C.getArgs().hasArg(options::OPT__SLASH_FA) && !C.getArgs().hasArg(options::OPT__SLASH_Fa) && !C.getArgs().hasArg(options::OPT_dxc_Fc); } /// Return true if a preprocessor action can be collapsed. bool canCollapsePreprocessorAction() const { return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) && !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps && !C.getArgs().hasArg(options::OPT_rewrite_objc); } /// Struct that relates an action with the offload actions that would be /// collapsed with it. struct JobActionInfo final { /// The action this info refers to. const JobAction *JA = nullptr; /// The offload actions we need to take care off if this action is /// collapsed. ActionList SavedOffloadAction; }; /// Append collapsed offload actions from the give nnumber of elements in the /// action info array. static void AppendCollapsedOffloadAction(ActionList &CollapsedOffloadAction, ArrayRef &ActionInfo, unsigned ElementNum) { assert(ElementNum <= ActionInfo.size() && "Invalid number of elements."); for (unsigned I = 0; I < ElementNum; ++I) CollapsedOffloadAction.append(ActionInfo[I].SavedOffloadAction.begin(), ActionInfo[I].SavedOffloadAction.end()); } /// Functions that attempt to perform the combining. They detect if that is /// legal, and if so they update the inputs \a Inputs and the offload action /// that were collapsed in \a CollapsedOffloadAction. A tool that deals with /// the combined action is returned. If the combining is not legal or if the /// tool does not exist, null is returned. /// Currently three kinds of collapsing are supported: /// - Assemble + Backend + Compile; /// - Assemble + Backend ; /// - Backend + Compile. const Tool * combineAssembleBackendCompile(ArrayRef ActionInfo, ActionList &Inputs, ActionList &CollapsedOffloadAction) { if (ActionInfo.size() < 3 || !canCollapseAssembleAction()) return nullptr; auto *AJ = dyn_cast(ActionInfo[0].JA); auto *BJ = dyn_cast(ActionInfo[1].JA); auto *CJ = dyn_cast(ActionInfo[2].JA); if (!AJ || !BJ || !CJ) return nullptr; // Get compiler tool. const Tool *T = TC.SelectTool(*CJ); if (!T) return nullptr; // Can't collapse if we don't have codegen support unless we are // emitting LLVM IR. bool OutputIsLLVM = types::isLLVMIR(ActionInfo[0].JA->getType()); if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) return nullptr; // When using -fembed-bitcode, it is required to have the same tool (clang) // for both CompilerJA and BackendJA. Otherwise, combine two stages. if (EmbedBitcode) { const Tool *BT = TC.SelectTool(*BJ); if (BT == T) return nullptr; } if (!T->hasIntegratedAssembler()) return nullptr; Inputs = CJ->getInputs(); AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, /*NumElements=*/3); return T; } const Tool *combineAssembleBackend(ArrayRef ActionInfo, ActionList &Inputs, ActionList &CollapsedOffloadAction) { if (ActionInfo.size() < 2 || !canCollapseAssembleAction()) return nullptr; auto *AJ = dyn_cast(ActionInfo[0].JA); auto *BJ = dyn_cast(ActionInfo[1].JA); if (!AJ || !BJ) return nullptr; // Get backend tool. const Tool *T = TC.SelectTool(*BJ); if (!T) return nullptr; if (!T->hasIntegratedAssembler()) return nullptr; Inputs = BJ->getInputs(); AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, /*NumElements=*/2); return T; } const Tool *combineBackendCompile(ArrayRef ActionInfo, ActionList &Inputs, ActionList &CollapsedOffloadAction) { if (ActionInfo.size() < 2) return nullptr; auto *BJ = dyn_cast(ActionInfo[0].JA); auto *CJ = dyn_cast(ActionInfo[1].JA); if (!BJ || !CJ) return nullptr; // Check if the initial input (to the compile job or its predessor if one // exists) is LLVM bitcode. In that case, no preprocessor step is required // and we can still collapse the compile and backend jobs when we have // -save-temps. I.e. there is no need for a separate compile job just to // emit unoptimized bitcode. bool InputIsBitcode = true; for (size_t i = 1; i < ActionInfo.size(); i++) if (ActionInfo[i].JA->getType() != types::TY_LLVM_BC && ActionInfo[i].JA->getType() != types::TY_LTO_BC) { InputIsBitcode = false; break; } if (!InputIsBitcode && !canCollapsePreprocessorAction()) return nullptr; // Get compiler tool. const Tool *T = TC.SelectTool(*CJ); if (!T) return nullptr; // Can't collapse if we don't have codegen support unless we are // emitting LLVM IR. bool OutputIsLLVM = types::isLLVMIR(ActionInfo[0].JA->getType()); if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) return nullptr; if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) || EmbedBitcode)) return nullptr; Inputs = CJ->getInputs(); AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, /*NumElements=*/2); return T; } /// Updates the inputs if the obtained tool supports combining with /// preprocessor action, and the current input is indeed a preprocessor /// action. If combining results in the collapse of offloading actions, those /// are appended to \a CollapsedOffloadAction. void combineWithPreprocessor(const Tool *T, ActionList &Inputs, ActionList &CollapsedOffloadAction) { if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP()) return; // Attempt to get a preprocessor action dependence. ActionList PreprocessJobOffloadActions; ActionList NewInputs; for (Action *A : Inputs) { auto *PJ = getPrevDependentAction({A}, PreprocessJobOffloadActions); if (!PJ || !isa(PJ)) { NewInputs.push_back(A); continue; } // This is legal to combine. Append any offload action we found and add the // current input to preprocessor inputs. CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(), PreprocessJobOffloadActions.end()); NewInputs.append(PJ->input_begin(), PJ->input_end()); } Inputs = NewInputs; } public: ToolSelector(const JobAction *BaseAction, const ToolChain &TC, const Compilation &C, bool SaveTemps, bool EmbedBitcode) : TC(TC), C(C), BaseAction(BaseAction), SaveTemps(SaveTemps), EmbedBitcode(EmbedBitcode) { assert(BaseAction && "Invalid base action."); IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None; } /// Check if a chain of actions can be combined and return the tool that can /// handle the combination of actions. The pointer to the current inputs \a /// Inputs and the list of offload actions \a CollapsedOffloadActions /// connected to collapsed actions are updated accordingly. The latter enables /// the caller of the selector to process them afterwards instead of just /// dropping them. If no suitable tool is found, null will be returned. const Tool *getTool(ActionList &Inputs, ActionList &CollapsedOffloadAction) { // // Get the largest chain of actions that we could combine. // SmallVector ActionChain(1); ActionChain.back().JA = BaseAction; while (ActionChain.back().JA) { const Action *CurAction = ActionChain.back().JA; // Grow the chain by one element. ActionChain.resize(ActionChain.size() + 1); JobActionInfo &AI = ActionChain.back(); // Attempt to fill it with the AI.JA = getPrevDependentAction(CurAction->getInputs(), AI.SavedOffloadAction); } // Pop the last action info as it could not be filled. ActionChain.pop_back(); // // Attempt to combine actions. If all combining attempts failed, just return // the tool of the provided action. At the end we attempt to combine the // action with any preprocessor action it may depend on. // const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs, CollapsedOffloadAction); if (!T) T = combineAssembleBackend(ActionChain, Inputs, CollapsedOffloadAction); if (!T) T = combineBackendCompile(ActionChain, Inputs, CollapsedOffloadAction); if (!T) { Inputs = BaseAction->getInputs(); T = TC.SelectTool(*BaseAction); } combineWithPreprocessor(T, Inputs, CollapsedOffloadAction); return T; } }; } /// Return a string that uniquely identifies the result of a job. The bound arch /// is not necessarily represented in the toolchain's triple -- for example, /// armv7 and armv7s both map to the same triple -- so we need both in our map. /// Also, we need to add the offloading device kind, as the same tool chain can /// be used for host and device for some programming models, e.g. OpenMP. static std::string GetTriplePlusArchString(const ToolChain *TC, StringRef BoundArch, Action::OffloadKind OffloadKind) { std::string TriplePlusArch = TC->getTriple().normalize(); if (!BoundArch.empty()) { TriplePlusArch += "-"; TriplePlusArch += BoundArch; } TriplePlusArch += "-"; TriplePlusArch += Action::GetOffloadKindName(OffloadKind); return TriplePlusArch; } InputInfoList Driver::BuildJobsForAction( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfoList> &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; auto CachedResult = CachedResults.find(ActionTC); if (CachedResult != CachedResults.end()) { return CachedResult->second; } InputInfoList Result = BuildJobsForActionNoCache( C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } static void handleTimeTrace(Compilation &C, const ArgList &Args, const JobAction *JA, const char *BaseInput, const InputInfo &Result) { Arg *A = Args.getLastArg(options::OPT_ftime_trace, options::OPT_ftime_trace_EQ); if (!A) return; SmallString<128> Path; if (A->getOption().matches(options::OPT_ftime_trace_EQ)) { Path = A->getValue(); if (llvm::sys::fs::is_directory(Path)) { SmallString<128> Tmp(Result.getFilename()); llvm::sys::path::replace_extension(Tmp, "json"); llvm::sys::path::append(Path, llvm::sys::path::filename(Tmp)); } } else { if (Arg *DumpDir = Args.getLastArgNoClaim(options::OPT_dumpdir)) { // The trace file is ${dumpdir}${basename}.json. Note that dumpdir may not // end with a path separator. Path = DumpDir->getValue(); Path += llvm::sys::path::filename(BaseInput); } else { Path = Result.getFilename(); } llvm::sys::path::replace_extension(Path, "json"); } const char *ResultFile = C.getArgs().MakeArgString(Path); C.addTimeTraceFile(ResultFile, JA); C.addResultFile(ResultFile, JA); } InputInfoList Driver::BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfoList> &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); InputInfoList OffloadDependencesInputInfo; bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None; if (const OffloadAction *OA = dyn_cast(A)) { // The 'Darwin' toolchain is initialized only when its arguments are // computed. Get the default arguments for OFK_None to ensure that // initialization is performed before processing the offload action. // FIXME: Remove when darwin's toolchain is initialized during construction. C.getArgsForToolChain(TC, BoundArch, Action::OFK_None); // The offload action is expected to be used in four different situations. // // a) Set a toolchain/architecture/kind for a host action: // Host Action 1 -> OffloadAction -> Host Action 2 // // b) Set a toolchain/architecture/kind for a device action; // Device Action 1 -> OffloadAction -> Device Action 2 // // c) Specify a device dependence to a host action; // Device Action 1 _ // \ // Host Action 1 ---> OffloadAction -> Host Action 2 // // d) Specify a host dependence to a device action. // Host Action 1 _ // \ // Device Action 1 ---> OffloadAction -> Device Action 2 // // For a) and b), we just return the job generated for the dependences. For // c) and d) we override the current action with the host/device dependence // if the current toolchain is host/device and set the offload dependences // info with the jobs obtained from the device/host dependence(s). // If there is a single device option or has no host action, just generate // the job for it. if (OA->hasSingleDeviceDependence() || !OA->hasHostDependence()) { InputInfoList DevA; OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { DevA.append(BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); }); return DevA; } // If 'Action 2' is host, we generate jobs for the device dependences and // override the current action with the host dependence. Otherwise, we // generate the host dependences and override the action with the device // dependence. The dependences can't therefore be a top-level action. OA->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); }); A = BuildingForOffloadDevice ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) : OA->getHostDependence(); // We may have already built this action as a part of the offloading // toolchain, return the cached input if so. std::pair ActionTC = { OA->getHostDependence(), GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; if (CachedResults.find(ActionTC) != CachedResults.end()) { InputInfoList Inputs = CachedResults[ActionTC]; Inputs.append(OffloadDependencesInputInfo); return Inputs; } } if (const InputAction *IA = dyn_cast(A)) { // FIXME: It would be nice to not claim this here; maybe the old scheme of // just using Args was better? const Arg &Input = IA->getInputArg(); Input.claim(); if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); return {InputInfo(A, Name, /* _BaseInput = */ Name)}; } return {InputInfo(A, &Input, /* _BaseInput = */ "")}; } if (const BindArchAction *BAA = dyn_cast(A)) { const ToolChain *TC; StringRef ArchName = BAA->getArchName(); if (!ArchName.empty()) TC = &getToolChain(C.getArgs(), computeTargetTriple(*this, TargetTriple, C.getArgs(), ArchName)); else TC = &C.getDefaultToolChain(); return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); } ActionList Inputs = A->getInputs(); const JobAction *JA = cast(A); ActionList CollapsedOffloadActions; ToolSelector TS(JA, *TC, C, isSaveTempsEnabled(), embedBitcodeInObject() && !isUsingLTO()); const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions); if (!T) return {InputInfo()}; // If we've collapsed action list that contained OffloadAction we // need to build jobs for host/device-side inputs it may have held. for (const auto *OA : CollapsedOffloadActions) cast(OA)->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); }); // Only use pipes when there is exactly one input. InputInfoList InputInfos; for (const Action *Input : Inputs) { // Treat dsymutil and verify sub-jobs as being at the top-level too, they // shouldn't get temporary output names. // FIXME: Clean this up. bool SubJobAtTopLevel = AtTopLevel && (isa(A) || isa(A)); InputInfos.append(BuildJobsForAction( C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults, A->getOffloadingDeviceKind())); } // Always use the first file input as the base input. const char *BaseInput = InputInfos[0].getBaseInput(); for (auto &Info : InputInfos) { if (Info.isFilename()) { BaseInput = Info.getBaseInput(); break; } } // ... except dsymutil actions, which use their actual input as the base // input. if (JA->getType() == types::TY_dSYM) BaseInput = InputInfos[0].getFilename(); // Append outputs of offload device jobs to the input list if (!OffloadDependencesInputInfo.empty()) InputInfos.append(OffloadDependencesInputInfo.begin(), OffloadDependencesInputInfo.end()); // Set the effective triple of the toolchain for the duration of this job. llvm::Triple EffectiveTriple; const ToolChain &ToolTC = T->getToolChain(); const ArgList &Args = C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind()); if (InputInfos.size() != 1) { EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args)); } else { // Pass along the input type if it can be unambiguously determined. EffectiveTriple = llvm::Triple( ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType())); } RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple); // Determine the place to write output to, if any. InputInfo Result; InputInfoList UnbundlingResults; if (auto *UA = dyn_cast(JA)) { // If we have an unbundling job, we need to create results for all the // outputs. We also update the results cache so that other actions using // this unbundling action can get the right results. for (auto &UI : UA->getDependentActionsInfo()) { assert(UI.DependentOffloadKind != Action::OFK_None && "Unbundling with no offloading??"); // Unbundling actions are never at the top level. When we generate the // offloading prefix, we also do that for the host file because the // unbundling action does not change the type of the output which can // cause a overwrite. std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( UI.DependentOffloadKind, UI.DependentToolChain->getTriple().normalize(), /*CreatePrefixForHost=*/true); auto CurI = InputInfo( UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch, /*AtTopLevel=*/false, MultipleArchs || UI.DependentOffloadKind == Action::OFK_HIP, OffloadingPrefix), BaseInput); // Save the unbundling result. UnbundlingResults.push_back(CurI); // Get the unique string identifier for this dependence and cache the // result. StringRef Arch; if (TargetDeviceOffloadKind == Action::OFK_HIP) { if (UI.DependentOffloadKind == Action::OFK_Host) Arch = StringRef(); else Arch = UI.DependentBoundArch; } else Arch = BoundArch; CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch, UI.DependentOffloadKind)}] = { CurI}; } // Now that we have all the results generated, select the one that should be // returned for the current depending action. std::pair ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); Result = CachedResults[ActionTC].front(); } else if (JA->getType() == types::TY_Nothing) Result = {InputInfo(A, BaseInput)}; else { // We only have to generate a prefix for the host if this is not a top-level // action. std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( A->getOffloadingDeviceKind(), TC->getTriple().normalize(), /*CreatePrefixForHost=*/isa(A) || !(A->getOffloadingHostActiveKinds() == Action::OFK_None || AtTopLevel)); Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, MultipleArchs, OffloadingPrefix), BaseInput); if (T->canEmitIR() && OffloadingPrefix.empty()) handleTimeTrace(C, Args, JA, BaseInput, Result); } if (CCCPrintBindings && !CCGenDiagnostics) { llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"' << " - \"" << T->getName() << "\", inputs: ["; for (unsigned i = 0, e = InputInfos.size(); i != e; ++i) { llvm::errs() << InputInfos[i].getAsString(); if (i + 1 != e) llvm::errs() << ", "; } if (UnbundlingResults.empty()) llvm::errs() << "], output: " << Result.getAsString() << "\n"; else { llvm::errs() << "], outputs: ["; for (unsigned i = 0, e = UnbundlingResults.size(); i != e; ++i) { llvm::errs() << UnbundlingResults[i].getAsString(); if (i + 1 != e) llvm::errs() << ", "; } llvm::errs() << "] \n"; } } else { if (UnbundlingResults.empty()) T->ConstructJob( C, *JA, Result, InputInfos, C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); else T->ConstructJobMultipleOutputs( C, *JA, UnbundlingResults, InputInfos, C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); } return {Result}; } const char *Driver::getDefaultImageName() const { llvm::Triple Target(llvm::Triple::normalize(TargetTriple)); return Target.isOSWindows() ? "a.exe" : "a.out"; } /// Create output filename based on ArgValue, which could either be a /// full filename, filename without extension, or a directory. If ArgValue /// does not provide a filename, then use BaseName, and use the extension /// suitable for FileType. static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue, StringRef BaseName, types::ID FileType) { SmallString<128> Filename = ArgValue; if (ArgValue.empty()) { // If the argument is empty, output to BaseName in the current dir. Filename = BaseName; } else if (llvm::sys::path::is_separator(Filename.back())) { // If the argument is a directory, output to BaseName in that dir. llvm::sys::path::append(Filename, BaseName); } if (!llvm::sys::path::has_extension(ArgValue)) { // If the argument didn't provide an extension, then set it. const char *Extension = types::getTypeTempSuffix(FileType, true); if (FileType == types::TY_Image && Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd)) { // The output file is a dll. Extension = "dll"; } llvm::sys::path::replace_extension(Filename, Extension); } return Args.MakeArgString(Filename.c_str()); } static bool HasPreprocessOutput(const Action &JA) { if (isa(JA)) return true; if (isa(JA) && isa(JA.getInputs()[0])) return true; if (isa(JA) && HasPreprocessOutput(*(JA.getInputs()[0]))) return true; return false; } const char *Driver::CreateTempFile(Compilation &C, StringRef Prefix, StringRef Suffix, bool MultipleArchs, StringRef BoundArch, bool NeedUniqueDirectory) const { SmallString<128> TmpName; Arg *A = C.getArgs().getLastArg(options::OPT_fcrash_diagnostics_dir); std::optional CrashDirectory = CCGenDiagnostics && A ? std::string(A->getValue()) : llvm::sys::Process::GetEnv("CLANG_CRASH_DIAGNOSTICS_DIR"); if (CrashDirectory) { if (!getVFS().exists(*CrashDirectory)) llvm::sys::fs::create_directories(*CrashDirectory); SmallString<128> Path(*CrashDirectory); llvm::sys::path::append(Path, Prefix); const char *Middle = !Suffix.empty() ? "-%%%%%%." : "-%%%%%%"; if (std::error_code EC = llvm::sys::fs::createUniqueFile(Path + Middle + Suffix, TmpName)) { Diag(clang::diag::err_unable_to_make_temp) << EC.message(); return ""; } } else { if (MultipleArchs && !BoundArch.empty()) { if (NeedUniqueDirectory) { TmpName = GetTemporaryDirectory(Prefix); llvm::sys::path::append(TmpName, Twine(Prefix) + "-" + BoundArch + "." + Suffix); } else { TmpName = GetTemporaryPath((Twine(Prefix) + "-" + BoundArch).str(), Suffix); } } else { TmpName = GetTemporaryPath(Prefix, Suffix); } } return C.addTempFile(C.getArgs().MakeArgString(TmpName)); } // Calculate the output path of the module file when compiling a module unit // with the `-fmodule-output` option or `-fmodule-output=` option specified. // The behavior is: // - If `-fmodule-output=` is specfied, then the module file is // writing to the value. // - Otherwise if the output object file of the module unit is specified, the // output path // of the module file should be the same with the output object file except // the corresponding suffix. This requires both `-o` and `-c` are specified. // - Otherwise, the output path of the module file will be the same with the // input with the corresponding suffix. static const char *GetModuleOutputPath(Compilation &C, const JobAction &JA, const char *BaseInput) { assert(isa(JA) && JA.getType() == types::TY_ModuleFile && (C.getArgs().hasArg(options::OPT_fmodule_output) || C.getArgs().hasArg(options::OPT_fmodule_output_EQ))); SmallString<256> OutputPath = tools::getCXX20NamedModuleOutputPath(C.getArgs(), BaseInput); return C.addResultFile(C.getArgs().MakeArgString(OutputPath.c_str()), &JA); } const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, const char *BaseInput, StringRef OrigBoundArch, bool AtTopLevel, bool MultipleArchs, StringRef OffloadingPrefix) const { std::string BoundArch = OrigBoundArch.str(); if (is_style_windows(llvm::sys::path::Style::native)) { // BoundArch may contains ':', which is invalid in file names on Windows, // therefore replace it with '%'. std::replace(BoundArch.begin(), BoundArch.end(), ':', '@'); } llvm::PrettyStackTraceString CrashInfo("Computing output path"); // Output to a user requested destination? if (AtTopLevel && !isa(JA) && !isa(JA)) { if (Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o)) return C.addResultFile(FinalOutput->getValue(), &JA); } // For /P, preprocess to file named after BaseInput. if (C.getArgs().hasArg(options::OPT__SLASH_P)) { assert(AtTopLevel && isa(JA)); StringRef BaseName = llvm::sys::path::filename(BaseInput); StringRef NameArg; if (Arg *A = C.getArgs().getLastArg(options::OPT__SLASH_Fi)) NameArg = A->getValue(); return C.addResultFile( MakeCLOutputFilename(C.getArgs(), NameArg, BaseName, types::TY_PP_C), &JA); } // Default to writing to stdout? if (AtTopLevel && !CCGenDiagnostics && HasPreprocessOutput(JA)) { return "-"; } if (JA.getType() == types::TY_ModuleFile && C.getArgs().getLastArg(options::OPT_module_file_info)) { return "-"; } if (JA.getType() == types::TY_PP_Asm && C.getArgs().hasArg(options::OPT_dxc_Fc)) { StringRef FcValue = C.getArgs().getLastArgValue(options::OPT_dxc_Fc); // TODO: Should we use `MakeCLOutputFilename` here? If so, we can probably // handle this as part of the SLASH_Fa handling below. return C.addResultFile(C.getArgs().MakeArgString(FcValue.str()), &JA); } if (JA.getType() == types::TY_Object && C.getArgs().hasArg(options::OPT_dxc_Fo)) { StringRef FoValue = C.getArgs().getLastArgValue(options::OPT_dxc_Fo); // TODO: Should we use `MakeCLOutputFilename` here? If so, we can probably // handle this as part of the SLASH_Fo handling below. return C.addResultFile(C.getArgs().MakeArgString(FoValue.str()), &JA); } // Is this the assembly listing for /FA? if (JA.getType() == types::TY_PP_Asm && (C.getArgs().hasArg(options::OPT__SLASH_FA) || C.getArgs().hasArg(options::OPT__SLASH_Fa))) { // Use /Fa and the input filename to determine the asm file name. StringRef BaseName = llvm::sys::path::filename(BaseInput); StringRef FaValue = C.getArgs().getLastArgValue(options::OPT__SLASH_Fa); return C.addResultFile( MakeCLOutputFilename(C.getArgs(), FaValue, BaseName, JA.getType()), &JA); } if (JA.getType() == types::TY_API_INFO && C.getArgs().hasArg(options::OPT_emit_extension_symbol_graphs) && C.getArgs().hasArg(options::OPT_o)) Diag(clang::diag::err_drv_unexpected_symbol_graph_output) << C.getArgs().getLastArgValue(options::OPT_o); // DXC defaults to standard out when generating assembly. We check this after // any DXC flags that might specify a file. if (AtTopLevel && JA.getType() == types::TY_PP_Asm && IsDXCMode()) return "-"; bool SpecifiedModuleOutput = C.getArgs().hasArg(options::OPT_fmodule_output) || C.getArgs().hasArg(options::OPT_fmodule_output_EQ); if (MultipleArchs && SpecifiedModuleOutput) Diag(clang::diag::err_drv_module_output_with_multiple_arch); // If we're emitting a module output with the specified option // `-fmodule-output`. if (!AtTopLevel && isa(JA) && JA.getType() == types::TY_ModuleFile && SpecifiedModuleOutput) { assert(!C.getArgs().hasArg(options::OPT_modules_reduced_bmi)); return GetModuleOutputPath(C, JA, BaseInput); } // Output to a temporary file? if ((!AtTopLevel && !isSaveTempsEnabled() && !C.getArgs().hasArg(options::OPT__SLASH_Fo)) || CCGenDiagnostics) { StringRef Name = llvm::sys::path::filename(BaseInput); std::pair Split = Name.split('.'); const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode() || IsDXCMode()); // The non-offloading toolchain on Darwin requires deterministic input // file name for binaries to be deterministic, therefore it needs unique // directory. llvm::Triple Triple(C.getDriver().getTargetTriple()); bool NeedUniqueDirectory = (JA.getOffloadingDeviceKind() == Action::OFK_None || JA.getOffloadingDeviceKind() == Action::OFK_Host) && Triple.isOSDarwin(); return CreateTempFile(C, Split.first, Suffix, MultipleArchs, BoundArch, NeedUniqueDirectory); } SmallString<128> BasePath(BaseInput); SmallString<128> ExternalPath(""); StringRef BaseName; // Dsymutil actions should use the full path. if (isa(JA) && C.getArgs().hasArg(options::OPT_dsym_dir)) { ExternalPath += C.getArgs().getLastArg(options::OPT_dsym_dir)->getValue(); // We use posix style here because the tests (specifically // darwin-dsymutil.c) demonstrate that posix style paths are acceptable // even on Windows and if we don't then the similar test covering this // fails. llvm::sys::path::append(ExternalPath, llvm::sys::path::Style::posix, llvm::sys::path::filename(BasePath)); BaseName = ExternalPath; } else if (isa(JA) || isa(JA)) BaseName = BasePath; else BaseName = llvm::sys::path::filename(BasePath); // Determine what the derived output name should be. const char *NamedOutput; if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) && C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) { // The /Fo or /o flag decides the object filename. StringRef Val = C.getArgs() .getLastArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o) ->getValue(); NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object); } else if (JA.getType() == types::TY_Image && C.getArgs().hasArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o)) { // The /Fe or /o flag names the linked file. StringRef Val = C.getArgs() .getLastArg(options::OPT__SLASH_Fe, options::OPT__SLASH_o) ->getValue(); NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Image); } else if (JA.getType() == types::TY_Image) { if (IsCLMode()) { // clang-cl uses BaseName for the executable name. NamedOutput = MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image); } else { SmallString<128> Output(getDefaultImageName()); // HIP image for device compilation with -fno-gpu-rdc is per compilation // unit. bool IsHIPNoRDC = JA.getOffloadingDeviceKind() == Action::OFK_HIP && !C.getArgs().hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false); bool UseOutExtension = IsHIPNoRDC || isa(JA); if (UseOutExtension) { Output = BaseName; llvm::sys::path::replace_extension(Output, ""); } Output += OffloadingPrefix; if (MultipleArchs && !BoundArch.empty()) { Output += "-"; Output.append(BoundArch); } if (UseOutExtension) Output += ".out"; NamedOutput = C.getArgs().MakeArgString(Output.c_str()); } } else if (JA.getType() == types::TY_PCH && IsCLMode()) { NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName)); } else if ((JA.getType() == types::TY_Plist || JA.getType() == types::TY_AST) && C.getArgs().hasArg(options::OPT__SLASH_o)) { StringRef Val = C.getArgs() .getLastArg(options::OPT__SLASH_o) ->getValue(); NamedOutput = MakeCLOutputFilename(C.getArgs(), Val, BaseName, types::TY_Object); } else { const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode() || IsDXCMode()); assert(Suffix && "All types used for output should have a suffix."); std::string::size_type End = std::string::npos; if (!types::appendSuffixForType(JA.getType())) End = BaseName.rfind('.'); SmallString<128> Suffixed(BaseName.substr(0, End)); Suffixed += OffloadingPrefix; if (MultipleArchs && !BoundArch.empty()) { Suffixed += "-"; Suffixed.append(BoundArch); } // When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for // the unoptimized bitcode so that it does not get overwritten by the ".bc" // optimized bitcode output. auto IsAMDRDCInCompilePhase = [](const JobAction &JA, const llvm::opt::DerivedArgList &Args) { // The relocatable compilation in HIP and OpenMP implies -emit-llvm. // Similarly, use a ".tmp.bc" suffix for the unoptimized bitcode // (generated in the compile phase.) const ToolChain *TC = JA.getOffloadingToolChain(); return isa(JA) && ((JA.getOffloadingDeviceKind() == Action::OFK_HIP && Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) || (JA.getOffloadingDeviceKind() == Action::OFK_OpenMP && TC && TC->getTriple().isAMDGPU())); }; if (!AtTopLevel && JA.getType() == types::TY_LLVM_BC && (C.getArgs().hasArg(options::OPT_emit_llvm) || IsAMDRDCInCompilePhase(JA, C.getArgs()))) Suffixed += ".tmp"; Suffixed += '.'; Suffixed += Suffix; NamedOutput = C.getArgs().MakeArgString(Suffixed.c_str()); } // Prepend object file path if -save-temps=obj if (!AtTopLevel && isSaveTempsObj() && C.getArgs().hasArg(options::OPT_o) && JA.getType() != types::TY_PCH) { Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o); SmallString<128> TempPath(FinalOutput->getValue()); llvm::sys::path::remove_filename(TempPath); StringRef OutputFileName = llvm::sys::path::filename(NamedOutput); llvm::sys::path::append(TempPath, OutputFileName); NamedOutput = C.getArgs().MakeArgString(TempPath.c_str()); } // If we're saving temps and the temp file conflicts with the input file, // then avoid overwriting input file. if (!AtTopLevel && isSaveTempsEnabled() && NamedOutput == BaseName) { bool SameFile = false; SmallString<256> Result; llvm::sys::fs::current_path(Result); llvm::sys::path::append(Result, BaseName); llvm::sys::fs::equivalent(BaseInput, Result.c_str(), SameFile); // Must share the same path to conflict. if (SameFile) { StringRef Name = llvm::sys::path::filename(BaseInput); std::pair Split = Name.split('.'); std::string TmpName = GetTemporaryPath( Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode() || IsDXCMode())); return C.addTempFile(C.getArgs().MakeArgString(TmpName)); } } // As an annoying special case, PCH generation doesn't strip the pathname. if (JA.getType() == types::TY_PCH && !IsCLMode()) { llvm::sys::path::remove_filename(BasePath); if (BasePath.empty()) BasePath = NamedOutput; else llvm::sys::path::append(BasePath, NamedOutput); return C.addResultFile(C.getArgs().MakeArgString(BasePath.c_str()), &JA); } return C.addResultFile(NamedOutput, &JA); } std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { // Search for Name in a list of paths. auto SearchPaths = [&](const llvm::SmallVectorImpl &P) -> std::optional { // Respect a limited subset of the '-Bprefix' functionality in GCC by // attempting to use this prefix when looking for file paths. for (const auto &Dir : P) { if (Dir.empty()) continue; SmallString<128> P(Dir[0] == '=' ? SysRoot + Dir.substr(1) : Dir); llvm::sys::path::append(P, Name); if (llvm::sys::fs::exists(Twine(P))) return std::string(P); } return std::nullopt; }; if (auto P = SearchPaths(PrefixDirs)) return *P; SmallString<128> R(ResourceDir); llvm::sys::path::append(R, Name); if (llvm::sys::fs::exists(Twine(R))) return std::string(R); SmallString<128> P(TC.getCompilerRTPath()); llvm::sys::path::append(P, Name); if (llvm::sys::fs::exists(Twine(P))) return std::string(P); SmallString<128> D(Dir); llvm::sys::path::append(D, "..", Name); if (llvm::sys::fs::exists(Twine(D))) return std::string(D); if (auto P = SearchPaths(TC.getLibraryPaths())) return *P; if (auto P = SearchPaths(TC.getFilePaths())) return *P; return std::string(Name); } void Driver::generatePrefixedToolNames( StringRef Tool, const ToolChain &TC, SmallVectorImpl &Names) const { // FIXME: Needs a better variable than TargetTriple Names.emplace_back((TargetTriple + "-" + Tool).str()); Names.emplace_back(Tool); } static bool ScanDirForExecutable(SmallString<128> &Dir, StringRef Name) { llvm::sys::path::append(Dir, Name); if (llvm::sys::fs::can_execute(Twine(Dir))) return true; llvm::sys::path::remove_filename(Dir); return false; } std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const { SmallVector TargetSpecificExecutables; generatePrefixedToolNames(Name, TC, TargetSpecificExecutables); // Respect a limited subset of the '-Bprefix' functionality in GCC by // attempting to use this prefix when looking for program paths. for (const auto &PrefixDir : PrefixDirs) { if (llvm::sys::fs::is_directory(PrefixDir)) { SmallString<128> P(PrefixDir); if (ScanDirForExecutable(P, Name)) return std::string(P); } else { SmallString<128> P((PrefixDir + Name).str()); if (llvm::sys::fs::can_execute(Twine(P))) return std::string(P); } } const ToolChain::path_list &List = TC.getProgramPaths(); for (const auto &TargetSpecificExecutable : TargetSpecificExecutables) { // For each possible name of the tool look for it in // program paths first, then the path. // Higher priority names will be first, meaning that // a higher priority name in the path will be found // instead of a lower priority name in the program path. // E.g. -gcc on the path will be found instead // of gcc in the program path for (const auto &Path : List) { SmallString<128> P(Path); if (ScanDirForExecutable(P, TargetSpecificExecutable)) return std::string(P); } // Fall back to the path if (llvm::ErrorOr P = llvm::sys::findProgramByName(TargetSpecificExecutable)) return *P; } return std::string(Name); } std::string Driver::GetStdModuleManifestPath(const Compilation &C, const ToolChain &TC) const { std::string error = ""; switch (TC.GetCXXStdlibType(C.getArgs())) { case ToolChain::CST_Libcxx: { auto evaluate = [&](const char *library) -> std::optional { std::string lib = GetFilePath(library, TC); // Note when there are multiple flavours of libc++ the module json needs // to look at the command-line arguments for the proper json. These // flavours do not exist at the moment, but there are plans to provide a // variant that is built with sanitizer instrumentation enabled. // For example // StringRef modules = [&] { // const SanitizerArgs &Sanitize = TC.getSanitizerArgs(C.getArgs()); // if (Sanitize.needsAsanRt()) // return "libc++.modules-asan.json"; // return "libc++.modules.json"; // }(); SmallString<128> path(lib.begin(), lib.end()); llvm::sys::path::remove_filename(path); llvm::sys::path::append(path, "libc++.modules.json"); if (TC.getVFS().exists(path)) return static_cast(path); return {}; }; if (std::optional result = evaluate("libc++.so"); result) return *result; return evaluate("libc++.a").value_or(error); } case ToolChain::CST_Libstdcxx: // libstdc++ does not provide Standard library modules yet. return error; } return error; } std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const { SmallString<128> Path; std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path); if (EC) { Diag(clang::diag::err_unable_to_make_temp) << EC.message(); return ""; } return std::string(Path); } std::string Driver::GetTemporaryDirectory(StringRef Prefix) const { SmallString<128> Path; std::error_code EC = llvm::sys::fs::createUniqueDirectory(Prefix, Path); if (EC) { Diag(clang::diag::err_unable_to_make_temp) << EC.message(); return ""; } return std::string(Path); } std::string Driver::GetClPchPath(Compilation &C, StringRef BaseName) const { SmallString<128> Output; if (Arg *FpArg = C.getArgs().getLastArg(options::OPT__SLASH_Fp)) { // FIXME: If anybody needs it, implement this obscure rule: // "If you specify a directory without a file name, the default file name // is VCx0.pch., where x is the major version of Visual C++ in use." Output = FpArg->getValue(); // "If you do not specify an extension as part of the path name, an // extension of .pch is assumed. " if (!llvm::sys::path::has_extension(Output)) Output += ".pch"; } else { if (Arg *YcArg = C.getArgs().getLastArg(options::OPT__SLASH_Yc)) Output = YcArg->getValue(); if (Output.empty()) Output = BaseName; llvm::sys::path::replace_extension(Output, ".pch"); } return std::string(Output); } const ToolChain &Driver::getToolChain(const ArgList &Args, const llvm::Triple &Target) const { auto &TC = ToolChains[Target.str()]; if (!TC) { switch (Target.getOS()) { case llvm::Triple::AIX: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Haiku: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Darwin: case llvm::Triple::MacOSX: case llvm::Triple::IOS: case llvm::Triple::TvOS: case llvm::Triple::WatchOS: case llvm::Triple::XROS: case llvm::Triple::DriverKit: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::DragonFly: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::OpenBSD: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::NetBSD: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::FreeBSD: if (Target.isPPC()) TC = std::make_unique(*this, Target, Args); else TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Linux: case llvm::Triple::ELFIAMCU: if (Target.getArch() == llvm::Triple::hexagon) TC = std::make_unique(*this, Target, Args); else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) && !Target.hasEnvironment()) TC = std::make_unique(*this, Target, Args); else if (Target.isPPC()) TC = std::make_unique(*this, Target, Args); else if (Target.getArch() == llvm::Triple::ve) TC = std::make_unique(*this, Target, Args); else if (Target.isOHOSFamily()) TC = std::make_unique(*this, Target, Args); else TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::NaCl: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Fuchsia: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Solaris: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::CUDA: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::AMDHSA: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::AMDPAL: case llvm::Triple::Mesa3D: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Win32: switch (Target.getEnvironment()) { default: if (Target.isOSBinFormatELF()) TC = std::make_unique(*this, Target, Args); else if (Target.isOSBinFormatMachO()) TC = std::make_unique(*this, Target, Args); else TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::GNU: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Itanium: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::MSVC: case llvm::Triple::UnknownEnvironment: if (Args.getLastArgValue(options::OPT_fuse_ld_EQ) .starts_with_insensitive("bfd")) TC = std::make_unique( *this, Target, Args); else TC = std::make_unique(*this, Target, Args); break; } break; case llvm::Triple::PS4: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::PS5: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::Hurd: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::LiteOS: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::ZOS: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::ShaderModel: TC = std::make_unique(*this, Target, Args); break; default: // Of these targets, Hexagon is the only one that might have // an OS of Linux, in which case it got handled above already. switch (Target.getArch()) { case llvm::Triple::tce: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::tcele: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::hexagon: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::lanai: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::xcore: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::wasm32: case llvm::Triple::wasm64: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::avr: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::msp430: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: if (toolchains::RISCVToolChain::hasGCCToolchain(*this, Args)) TC = std::make_unique(*this, Target, Args); else TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::ve: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::spirv32: case llvm::Triple::spirv64: TC = std::make_unique(*this, Target, Args); break; case llvm::Triple::csky: TC = std::make_unique(*this, Target, Args); break; default: if (toolchains::BareMetal::handlesTarget(Target)) TC = std::make_unique(*this, Target, Args); else if (Target.isOSBinFormatELF()) TC = std::make_unique(*this, Target, Args); else if (Target.isOSBinFormatMachO()) TC = std::make_unique(*this, Target, Args); else TC = std::make_unique(*this, Target, Args); } } } return *TC; } const ToolChain &Driver::getOffloadingDeviceToolChain( const ArgList &Args, const llvm::Triple &Target, const ToolChain &HostTC, const Action::OffloadKind &TargetDeviceOffloadKind) const { // Use device / host triples as the key into the ToolChains map because the // device ToolChain we create depends on both. auto &TC = ToolChains[Target.str() + "/" + HostTC.getTriple().str()]; if (!TC) { // Categorized by offload kind > arch rather than OS > arch like // the normal getToolChain call, as it seems a reasonable way to categorize // things. switch (TargetDeviceOffloadKind) { case Action::OFK_HIP: { if (((Target.getArch() == llvm::Triple::amdgcn || Target.getArch() == llvm::Triple::spirv64) && Target.getVendor() == llvm::Triple::AMD && Target.getOS() == llvm::Triple::AMDHSA) || !Args.hasArgNoClaim(options::OPT_offload_EQ)) TC = std::make_unique(*this, Target, HostTC, Args); else if (Target.getArch() == llvm::Triple::spirv64 && Target.getVendor() == llvm::Triple::UnknownVendor && Target.getOS() == llvm::Triple::UnknownOS) TC = std::make_unique(*this, Target, HostTC, Args); break; } default: break; } } return *TC; } bool Driver::ShouldUseClangCompiler(const JobAction &JA) const { // Say "no" if there is not exactly one input of a type clang understands. if (JA.size() != 1 || !types::isAcceptedByClang((*JA.input_begin())->getType())) return false; // And say "no" if this is not a kind of action clang understands. if (!isa(JA) && !isa(JA) && !isa(JA) && !isa(JA) && !isa(JA)) return false; return true; } bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const { // Say "no" if there is not exactly one input of a type flang understands. if (JA.size() != 1 || !types::isAcceptedByFlang((*JA.input_begin())->getType())) return false; // And say "no" if this is not a kind of action flang understands. if (!isa(JA) && !isa(JA) && !isa(JA)) return false; return true; } bool Driver::ShouldEmitStaticLibrary(const ArgList &Args) const { // Only emit static library if the flag is set explicitly. if (Args.hasArg(options::OPT_emit_static_lib)) return true; return false; } /// GetReleaseVersion - Parse (([0-9]+)(.([0-9]+)(.([0-9]+)?))?)? and return the /// grouped values as integers. Numbers which are not provided are set to 0. /// /// \return True if the entire string was parsed (9.2), or all groups were /// parsed (10.3.5extrastuff). bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor, unsigned &Micro, bool &HadExtra) { HadExtra = false; Major = Minor = Micro = 0; if (Str.empty()) return false; if (Str.consumeInteger(10, Major)) return false; if (Str.empty()) return true; if (!Str.consume_front(".")) return false; if (Str.consumeInteger(10, Minor)) return false; if (Str.empty()) return true; if (!Str.consume_front(".")) return false; if (Str.consumeInteger(10, Micro)) return false; if (!Str.empty()) HadExtra = true; return true; } /// Parse digits from a string \p Str and fulfill \p Digits with /// the parsed numbers. This method assumes that the max number of /// digits to look for is equal to Digits.size(). /// /// \return True if the entire string was parsed and there are /// no extra characters remaining at the end. bool Driver::GetReleaseVersion(StringRef Str, MutableArrayRef Digits) { if (Str.empty()) return false; unsigned CurDigit = 0; while (CurDigit < Digits.size()) { unsigned Digit; if (Str.consumeInteger(10, Digit)) return false; Digits[CurDigit] = Digit; if (Str.empty()) return true; if (!Str.consume_front(".")) return false; CurDigit++; } // More digits than requested, bail out... return false; } llvm::opt::Visibility Driver::getOptionVisibilityMask(bool UseDriverMode) const { if (!UseDriverMode) return llvm::opt::Visibility(options::ClangOption); if (IsCLMode()) return llvm::opt::Visibility(options::CLOption); if (IsDXCMode()) return llvm::opt::Visibility(options::DXCOption); if (IsFlangMode()) { return llvm::opt::Visibility(options::FlangOption); } return llvm::opt::Visibility(options::ClangOption); } const char *Driver::getExecutableForDriverMode(DriverMode Mode) { switch (Mode) { case GCCMode: return "clang"; case GXXMode: return "clang++"; case CPPMode: return "clang-cpp"; case CLMode: return "clang-cl"; case FlangMode: return "flang"; case DXCMode: return "clang-dxc"; } llvm_unreachable("Unhandled Mode"); } bool clang::driver::isOptimizationLevelFast(const ArgList &Args) { return Args.hasFlag(options::OPT_Ofast, options::OPT_O_Group, false); } bool clang::driver::willEmitRemarks(const ArgList &Args) { // -fsave-optimization-record enables it. if (Args.hasFlag(options::OPT_fsave_optimization_record, options::OPT_fno_save_optimization_record, false)) return true; // -fsave-optimization-record= enables it as well. if (Args.hasFlag(options::OPT_fsave_optimization_record_EQ, options::OPT_fno_save_optimization_record, false)) return true; // -foptimization-record-file alone enables it too. if (Args.hasFlag(options::OPT_foptimization_record_file_EQ, options::OPT_fno_save_optimization_record, false)) return true; // -foptimization-record-passes alone enables it too. if (Args.hasFlag(options::OPT_foptimization_record_passes_EQ, options::OPT_fno_save_optimization_record, false)) return true; return false; } llvm::StringRef clang::driver::getDriverMode(StringRef ProgName, ArrayRef Args) { static StringRef OptName = getDriverOptTable().getOption(options::OPT_driver_mode).getPrefixedName(); llvm::StringRef Opt; for (StringRef Arg : Args) { if (!Arg.starts_with(OptName)) continue; Opt = Arg; } if (Opt.empty()) Opt = ToolChain::getTargetAndModeFromProgramName(ProgName).DriverMode; return Opt.consume_front(OptName) ? Opt : ""; } bool driver::IsClangCL(StringRef DriverMode) { return DriverMode == "cl"; } llvm::Error driver::expandResponseFiles(SmallVectorImpl &Args, bool ClangCLMode, llvm::BumpPtrAllocator &Alloc, llvm::vfs::FileSystem *FS) { // Parse response files using the GNU syntax, unless we're in CL mode. There // are two ways to put clang in CL compatibility mode: ProgName is either // clang-cl or cl, or --driver-mode=cl is on the command line. The normal // command line parsing can't happen until after response file parsing, so we // have to manually search for a --driver-mode=cl argument the hard way. // Finally, our -cc1 tools don't care which tokenization mode we use because // response files written by clang will tokenize the same way in either mode. enum { Default, POSIX, Windows } RSPQuoting = Default; for (const char *F : Args) { if (strcmp(F, "--rsp-quoting=posix") == 0) RSPQuoting = POSIX; else if (strcmp(F, "--rsp-quoting=windows") == 0) RSPQuoting = Windows; } // Determines whether we want nullptr markers in Args to indicate response // files end-of-lines. We only use this for the /LINK driver argument with // clang-cl.exe on Windows. bool MarkEOLs = ClangCLMode; llvm::cl::TokenizerCallback Tokenizer; if (RSPQuoting == Windows || (RSPQuoting == Default && ClangCLMode)) Tokenizer = &llvm::cl::TokenizeWindowsCommandLine; else Tokenizer = &llvm::cl::TokenizeGNUCommandLine; if (MarkEOLs && Args.size() > 1 && StringRef(Args[1]).starts_with("-cc1")) MarkEOLs = false; llvm::cl::ExpansionContext ECtx(Alloc, Tokenizer); ECtx.setMarkEOLs(MarkEOLs); if (FS) ECtx.setVFS(FS); if (llvm::Error Err = ECtx.expandResponseFiles(Args)) return Err; // If -cc1 came from a response file, remove the EOL sentinels. auto FirstArg = llvm::find_if(llvm::drop_begin(Args), [](const char *A) { return A != nullptr; }); if (FirstArg != Args.end() && StringRef(*FirstArg).starts_with("-cc1")) { // If -cc1 came from a response file, remove the EOL sentinels. if (MarkEOLs) { auto newEnd = std::remove(Args.begin(), Args.end(), nullptr); Args.resize(newEnd - Args.begin()); } } return llvm::Error::success(); } static const char *GetStableCStr(llvm::StringSet<> &SavedStrings, StringRef S) { return SavedStrings.insert(S).first->getKeyData(); } /// Apply a list of edits to the input argument lists. /// /// The input string is a space separated list of edits to perform, /// they are applied in order to the input argument lists. Edits /// should be one of the following forms: /// /// '#': Silence information about the changes to the command line arguments. /// /// '^': Add FOO as a new argument at the beginning of the command line. /// /// '+': Add FOO as a new argument at the end of the command line. /// /// 's/XXX/YYY/': Substitute the regular expression XXX with YYY in the command /// line. /// /// 'xOPTION': Removes all instances of the literal argument OPTION. /// /// 'XOPTION': Removes all instances of the literal argument OPTION, /// and the following argument. /// /// 'Ox': Removes all flags matching 'O' or 'O[sz0-9]' and adds 'Ox' /// at the end of the command line. /// /// \param OS - The stream to write edit information to. /// \param Args - The vector of command line arguments. /// \param Edit - The override command to perform. /// \param SavedStrings - Set to use for storing string representations. static void applyOneOverrideOption(raw_ostream &OS, SmallVectorImpl &Args, StringRef Edit, llvm::StringSet<> &SavedStrings) { // This does not need to be efficient. if (Edit[0] == '^') { const char *Str = GetStableCStr(SavedStrings, Edit.substr(1)); OS << "### Adding argument " << Str << " at beginning\n"; Args.insert(Args.begin() + 1, Str); } else if (Edit[0] == '+') { const char *Str = GetStableCStr(SavedStrings, Edit.substr(1)); OS << "### Adding argument " << Str << " at end\n"; Args.push_back(Str); } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.ends_with("/") && Edit.slice(2, Edit.size() - 1).contains('/')) { StringRef MatchPattern = Edit.substr(2).split('/').first; StringRef ReplPattern = Edit.substr(2).split('/').second; ReplPattern = ReplPattern.slice(0, ReplPattern.size() - 1); for (unsigned i = 1, e = Args.size(); i != e; ++i) { // Ignore end-of-line response file markers if (Args[i] == nullptr) continue; std::string Repl = llvm::Regex(MatchPattern).sub(ReplPattern, Args[i]); if (Repl != Args[i]) { OS << "### Replacing '" << Args[i] << "' with '" << Repl << "'\n"; Args[i] = GetStableCStr(SavedStrings, Repl); } } } else if (Edit[0] == 'x' || Edit[0] == 'X') { auto Option = Edit.substr(1); for (unsigned i = 1; i < Args.size();) { if (Option == Args[i]) { OS << "### Deleting argument " << Args[i] << '\n'; Args.erase(Args.begin() + i); if (Edit[0] == 'X') { if (i < Args.size()) { OS << "### Deleting argument " << Args[i] << '\n'; Args.erase(Args.begin() + i); } else OS << "### Invalid X edit, end of command line!\n"; } } else ++i; } } else if (Edit[0] == 'O') { for (unsigned i = 1; i < Args.size();) { const char *A = Args[i]; // Ignore end-of-line response file markers if (A == nullptr) continue; if (A[0] == '-' && A[1] == 'O' && (A[2] == '\0' || (A[3] == '\0' && (A[2] == 's' || A[2] == 'z' || ('0' <= A[2] && A[2] <= '9'))))) { OS << "### Deleting argument " << Args[i] << '\n'; Args.erase(Args.begin() + i); } else ++i; } OS << "### Adding argument " << Edit << " at end\n"; Args.push_back(GetStableCStr(SavedStrings, '-' + Edit.str())); } else { OS << "### Unrecognized edit: " << Edit << "\n"; } } void driver::applyOverrideOptions(SmallVectorImpl &Args, const char *OverrideStr, llvm::StringSet<> &SavedStrings, raw_ostream *OS) { if (!OS) OS = &llvm::nulls(); if (OverrideStr[0] == '#') { ++OverrideStr; OS = &llvm::nulls(); } *OS << "### CCC_OVERRIDE_OPTIONS: " << OverrideStr << "\n"; // This does not need to be efficient. const char *S = OverrideStr; while (*S) { const char *End = ::strchr(S, ' '); if (!End) End = S + strlen(S); if (End != S) applyOneOverrideOption(*OS, Args, std::string(S, End), SavedStrings); S = End; if (*S != '\0') ++S; } } diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/ARM.cpp index a6041b809b80..0489911ecd9d 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -1,1087 +1,1094 @@ //===--- ARM.cpp - ARM (not AArch64) Helpers for Tools ----------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "ARM.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/TargetParser/ARMTargetParser.h" #include "llvm/TargetParser/Host.h" using namespace clang::driver; using namespace clang::driver::tools; using namespace clang; using namespace llvm::opt; // Get SubArch (vN). int arm::getARMSubArchVersionNumber(const llvm::Triple &Triple) { llvm::StringRef Arch = Triple.getArchName(); return llvm::ARM::parseArchVersion(Arch); } // True if M-profile. bool arm::isARMMProfile(const llvm::Triple &Triple) { llvm::StringRef Arch = Triple.getArchName(); return llvm::ARM::parseArchProfile(Arch) == llvm::ARM::ProfileKind::M; } // On Arm the endianness of the output file is determined by the target and // can be overridden by the pseudo-target flags '-mlittle-endian'/'-EL' and // '-mbig-endian'/'-EB'. Unlike other targets the flag does not result in a // normalized triple so we must handle the flag here. bool arm::isARMBigEndian(const llvm::Triple &Triple, const ArgList &Args) { if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, options::OPT_mbig_endian)) { return !A->getOption().matches(options::OPT_mlittle_endian); } return Triple.getArch() == llvm::Triple::armeb || Triple.getArch() == llvm::Triple::thumbeb; } // True if A-profile. bool arm::isARMAProfile(const llvm::Triple &Triple) { llvm::StringRef Arch = Triple.getArchName(); return llvm::ARM::parseArchProfile(Arch) == llvm::ARM::ProfileKind::A; } // Get Arch/CPU from args. void arm::getARMArchCPUFromArgs(const ArgList &Args, llvm::StringRef &Arch, llvm::StringRef &CPU, bool FromAs) { if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mcpu_EQ)) CPU = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) Arch = A->getValue(); if (!FromAs) return; for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { // Use getValues because -Wa can have multiple arguments // e.g. -Wa,-mcpu=foo,-mcpu=bar for (StringRef Value : A->getValues()) { if (Value.starts_with("-mcpu=")) CPU = Value.substr(6); if (Value.starts_with("-march=")) Arch = Value.substr(7); } } } // Handle -mhwdiv=. // FIXME: Use ARMTargetParser. static void getARMHWDivFeatures(const Driver &D, const Arg *A, const ArgList &Args, StringRef HWDiv, std::vector &Features) { uint64_t HWDivID = llvm::ARM::parseHWDiv(HWDiv); if (!llvm::ARM::getHWDivFeatures(HWDivID, Features)) D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); } // Handle -mfpu=. static llvm::ARM::FPUKind getARMFPUFeatures(const Driver &D, const Arg *A, const ArgList &Args, StringRef FPU, std::vector &Features) { llvm::ARM::FPUKind FPUKind = llvm::ARM::parseFPU(FPU); if (!llvm::ARM::getFPUFeatures(FPUKind, Features)) D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args); return FPUKind; } // Decode ARM features from string like +[no]featureA+[no]featureB+... static bool DecodeARMFeatures(const Driver &D, StringRef text, StringRef CPU, llvm::ARM::ArchKind ArchKind, std::vector &Features, llvm::ARM::FPUKind &ArgFPUKind) { SmallVector Split; text.split(Split, StringRef("+"), -1, false); for (StringRef Feature : Split) { if (!appendArchExtFeatures(CPU, ArchKind, Feature, Features, ArgFPUKind)) return false; } return true; } static void DecodeARMFeaturesFromCPU(const Driver &D, StringRef CPU, std::vector &Features) { CPU = CPU.split("+").first; if (CPU != "generic") { llvm::ARM::ArchKind ArchKind = llvm::ARM::parseCPUArch(CPU); uint64_t Extension = llvm::ARM::getDefaultExtensions(CPU, ArchKind); llvm::ARM::getExtensionFeatures(Extension, Features); } } // Check if -march is valid by checking if it can be canonicalised and parsed. // getARMArch is used here instead of just checking the -march value in order // to handle -march=native correctly. static void checkARMArchName(const Driver &D, const Arg *A, const ArgList &Args, llvm::StringRef ArchName, llvm::StringRef CPUName, std::vector &Features, const llvm::Triple &Triple, llvm::ARM::FPUKind &ArgFPUKind) { std::pair Split = ArchName.split("+"); std::string MArch = arm::getARMArch(ArchName, Triple); llvm::ARM::ArchKind ArchKind = llvm::ARM::parseArch(MArch); if (ArchKind == llvm::ARM::ArchKind::INVALID || (Split.second.size() && !DecodeARMFeatures(D, Split.second, CPUName, ArchKind, Features, ArgFPUKind))) D.Diag(clang::diag::err_drv_unsupported_option_argument) << A->getSpelling() << A->getValue(); } // Check -mcpu=. Needs ArchName to handle -mcpu=generic. static void checkARMCPUName(const Driver &D, const Arg *A, const ArgList &Args, llvm::StringRef CPUName, llvm::StringRef ArchName, std::vector &Features, const llvm::Triple &Triple, llvm::ARM::FPUKind &ArgFPUKind) { std::pair Split = CPUName.split("+"); std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); llvm::ARM::ArchKind ArchKind = arm::getLLVMArchKindForARM(CPU, ArchName, Triple); if (ArchKind == llvm::ARM::ArchKind::INVALID || (Split.second.size() && !DecodeARMFeatures(D, Split.second, CPU, ArchKind, Features, ArgFPUKind))) D.Diag(clang::diag::err_drv_unsupported_option_argument) << A->getSpelling() << A->getValue(); } // If -mfloat-abi=hard or -mhard-float are specified explicitly then check that // floating point registers are available on the target CPU. static void checkARMFloatABI(const Driver &D, const ArgList &Args, bool HasFPRegs) { if (HasFPRegs) return; const Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ); if (A && (A->getOption().matches(options::OPT_mhard_float) || (A->getOption().matches(options::OPT_mfloat_abi_EQ) && A->getValue() == StringRef("hard")))) D.Diag(clang::diag::warn_drv_no_floating_point_registers) << A->getAsString(Args); } bool arm::useAAPCSForMachO(const llvm::Triple &T) { // The backend is hardwired to assume AAPCS for M-class processors, ensure // the frontend matches that. return T.getEnvironment() == llvm::Triple::EABI || T.getEnvironment() == llvm::Triple::EABIHF || T.getOS() == llvm::Triple::UnknownOS || isARMMProfile(T); } // We follow GCC and support when the backend has support for the MRC/MCR // instructions that are used to set the hard thread pointer ("CP15 C13 // Thread id"). bool arm::isHardTPSupported(const llvm::Triple &Triple) { int Ver = getARMSubArchVersionNumber(Triple); llvm::ARM::ArchKind AK = llvm::ARM::parseArch(Triple.getArchName()); return Triple.isARM() || AK == llvm::ARM::ArchKind::ARMV6T2 || (Ver >= 7 && AK != llvm::ARM::ArchKind::ARMV8MBaseline); } // Select mode for reading thread pointer (-mtp=soft/cp15). arm::ReadTPMode arm::getReadTPMode(const Driver &D, const ArgList &Args, const llvm::Triple &Triple, bool ForAS) { if (Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ)) { arm::ReadTPMode ThreadPointer = llvm::StringSwitch(A->getValue()) .Case("cp15", ReadTPMode::TPIDRURO) .Case("tpidrurw", ReadTPMode::TPIDRURW) .Case("tpidruro", ReadTPMode::TPIDRURO) .Case("tpidrprw", ReadTPMode::TPIDRPRW) .Case("soft", ReadTPMode::Soft) .Default(ReadTPMode::Invalid); if ((ThreadPointer == ReadTPMode::TPIDRURW || ThreadPointer == ReadTPMode::TPIDRURO || ThreadPointer == ReadTPMode::TPIDRPRW) && !isHardTPSupported(Triple) && !ForAS) { D.Diag(diag::err_target_unsupported_tp_hard) << Triple.getArchName(); return ReadTPMode::Invalid; } if (ThreadPointer != ReadTPMode::Invalid) return ThreadPointer; if (StringRef(A->getValue()).empty()) D.Diag(diag::err_drv_missing_arg_mtp) << A->getAsString(Args); else D.Diag(diag::err_drv_invalid_mtp) << A->getAsString(Args); return ReadTPMode::Invalid; } return ReadTPMode::Soft; } void arm::setArchNameInTriple(const Driver &D, const ArgList &Args, types::ID InputType, llvm::Triple &Triple) { StringRef MCPU, MArch; if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) MCPU = A->getValue(); if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) MArch = A->getValue(); std::string CPU = Triple.isOSBinFormatMachO() ? tools::arm::getARMCPUForMArch(MArch, Triple).str() : tools::arm::getARMTargetCPU(MCPU, MArch, Triple); StringRef Suffix = tools::arm::getLLVMArchSuffixForARM(CPU, MArch, Triple); bool IsBigEndian = Triple.getArch() == llvm::Triple::armeb || Triple.getArch() == llvm::Triple::thumbeb; // Handle pseudo-target flags '-mlittle-endian'/'-EL' and // '-mbig-endian'/'-EB'. if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, options::OPT_mbig_endian)) { IsBigEndian = !A->getOption().matches(options::OPT_mlittle_endian); } std::string ArchName = IsBigEndian ? "armeb" : "arm"; // FIXME: Thumb should just be another -target-feaure, not in the triple. bool IsMProfile = llvm::ARM::parseArchProfile(Suffix) == llvm::ARM::ProfileKind::M; bool ThumbDefault = IsMProfile || // Thumb2 is the default for V7 on Darwin. (llvm::ARM::parseArchVersion(Suffix) == 7 && Triple.isOSBinFormatMachO()) || // FIXME: this is invalid for WindowsCE Triple.isOSWindows(); // Check if ARM ISA was explicitly selected (using -mno-thumb or -marm) for // M-Class CPUs/architecture variants, which is not supported. bool ARMModeRequested = !Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, ThumbDefault); if (IsMProfile && ARMModeRequested) { if (MCPU.size()) D.Diag(diag::err_cpu_unsupported_isa) << CPU << "ARM"; else D.Diag(diag::err_arch_unsupported_isa) << tools::arm::getARMArch(MArch, Triple) << "ARM"; } // Check to see if an explicit choice to use thumb has been made via // -mthumb. For assembler files we must check for -mthumb in the options // passed to the assembler via -Wa or -Xassembler. bool IsThumb = false; if (InputType != types::TY_PP_Asm) IsThumb = Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, ThumbDefault); else { // Ideally we would check for these flags in // CollectArgsForIntegratedAssembler but we can't change the ArchName at // that point. llvm::StringRef WaMArch, WaMCPU; for (const auto *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { for (StringRef Value : A->getValues()) { // There is no assembler equivalent of -mno-thumb, -marm, or -mno-arm. if (Value == "-mthumb") IsThumb = true; else if (Value.starts_with("-march=")) WaMArch = Value.substr(7); else if (Value.starts_with("-mcpu=")) WaMCPU = Value.substr(6); } } if (WaMCPU.size() || WaMArch.size()) { // The way this works means that we prefer -Wa,-mcpu's architecture // over -Wa,-march. Which matches the compiler behaviour. Suffix = tools::arm::getLLVMArchSuffixForARM(WaMCPU, WaMArch, Triple); } } // Assembly files should start in ARM mode, unless arch is M-profile, or // -mthumb has been passed explicitly to the assembler. Windows is always // thumb. if (IsThumb || IsMProfile || Triple.isOSWindows()) { if (IsBigEndian) ArchName = "thumbeb"; else ArchName = "thumb"; } Triple.setArchName(ArchName + Suffix.str()); } void arm::setFloatABIInTriple(const Driver &D, const ArgList &Args, llvm::Triple &Triple) { if (Triple.isOSLiteOS()) { Triple.setEnvironment(llvm::Triple::OpenHOS); return; } bool isHardFloat = (arm::getARMFloatABI(D, Triple, Args) == arm::FloatABI::Hard); switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: Triple.setEnvironment(isHardFloat ? llvm::Triple::GNUEABIHF : llvm::Triple::GNUEABI); break; + case llvm::Triple::GNUEABIT64: + case llvm::Triple::GNUEABIHFT64: + Triple.setEnvironment(isHardFloat ? llvm::Triple::GNUEABIHFT64 + : llvm::Triple::GNUEABIT64); + break; case llvm::Triple::EABI: case llvm::Triple::EABIHF: Triple.setEnvironment(isHardFloat ? llvm::Triple::EABIHF : llvm::Triple::EABI); break; case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: Triple.setEnvironment(isHardFloat ? llvm::Triple::MuslEABIHF : llvm::Triple::MuslEABI); break; case llvm::Triple::OpenHOS: break; default: { arm::FloatABI DefaultABI = arm::getDefaultFloatABI(Triple); if (DefaultABI != arm::FloatABI::Invalid && isHardFloat != (DefaultABI == arm::FloatABI::Hard)) { Arg *ABIArg = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ); assert(ABIArg && "Non-default float abi expected to be from arg"); D.Diag(diag::err_drv_unsupported_opt_for_target) << ABIArg->getAsString(Args) << Triple.getTriple(); } break; } } } arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) { return arm::getARMFloatABI(TC.getDriver(), TC.getEffectiveTriple(), Args); } arm::FloatABI arm::getDefaultFloatABI(const llvm::Triple &Triple) { auto SubArch = getARMSubArchVersionNumber(Triple); switch (Triple.getOS()) { case llvm::Triple::Darwin: case llvm::Triple::MacOSX: case llvm::Triple::IOS: case llvm::Triple::TvOS: case llvm::Triple::DriverKit: case llvm::Triple::XROS: // Darwin defaults to "softfp" for v6 and v7. if (Triple.isWatchABI()) return FloatABI::Hard; else return (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft; case llvm::Triple::WatchOS: return FloatABI::Hard; // FIXME: this is invalid for WindowsCE case llvm::Triple::Win32: // It is incorrect to select hard float ABI on MachO platforms if the ABI is // "apcs-gnu". if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple)) return FloatABI::Soft; return FloatABI::Hard; case llvm::Triple::NetBSD: switch (Triple.getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: return FloatABI::Hard; default: return FloatABI::Soft; } break; case llvm::Triple::FreeBSD: switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABIHF: return FloatABI::Hard; default: // FreeBSD defaults to soft float return FloatABI::Soft; } break; case llvm::Triple::Haiku: case llvm::Triple::OpenBSD: return FloatABI::SoftFP; default: if (Triple.isOHOSFamily()) return FloatABI::Soft; switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABIHF: + case llvm::Triple::GNUEABIHFT64: case llvm::Triple::MuslEABIHF: case llvm::Triple::EABIHF: return FloatABI::Hard; case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIT64: case llvm::Triple::MuslEABI: case llvm::Triple::EABI: // EABI is always AAPCS, and if it was not marked 'hard', it's softfp return FloatABI::SoftFP; case llvm::Triple::Android: return (SubArch >= 7) ? FloatABI::SoftFP : FloatABI::Soft; default: return FloatABI::Invalid; } } return FloatABI::Invalid; } // Select the float ABI as determined by -msoft-float, -mhard-float, and // -mfloat-abi=. arm::FloatABI arm::getARMFloatABI(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) { arm::FloatABI ABI = FloatABI::Invalid; if (Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ)) { if (A->getOption().matches(options::OPT_msoft_float)) { ABI = FloatABI::Soft; } else if (A->getOption().matches(options::OPT_mhard_float)) { ABI = FloatABI::Hard; } else { ABI = llvm::StringSwitch(A->getValue()) .Case("soft", FloatABI::Soft) .Case("softfp", FloatABI::SoftFP) .Case("hard", FloatABI::Hard) .Default(FloatABI::Invalid); if (ABI == FloatABI::Invalid && !StringRef(A->getValue()).empty()) { D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args); ABI = FloatABI::Soft; } } } // If unspecified, choose the default based on the platform. if (ABI == FloatABI::Invalid) ABI = arm::getDefaultFloatABI(Triple); if (ABI == FloatABI::Invalid) { // Assume "soft", but warn the user we are guessing. if (Triple.isOSBinFormatMachO() && Triple.getSubArch() == llvm::Triple::ARMSubArch_v7em) ABI = FloatABI::Hard; else ABI = FloatABI::Soft; if (Triple.getOS() != llvm::Triple::UnknownOS || !Triple.isOSBinFormatMachO()) D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft"; } assert(ABI != FloatABI::Invalid && "must select an ABI"); return ABI; } static bool hasIntegerMVE(const std::vector &F) { auto MVE = llvm::find(llvm::reverse(F), "+mve"); auto NoMVE = llvm::find(llvm::reverse(F), "-mve"); return MVE != F.rend() && (NoMVE == F.rend() || std::distance(MVE, NoMVE) > 0); } llvm::ARM::FPUKind arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, std::vector &Features, bool ForAS, bool ForMultilib) { bool KernelOrKext = Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext); arm::FloatABI ABI = arm::getARMFloatABI(D, Triple, Args); std::optional> WaCPU, WaFPU, WaHDiv, WaArch; // This vector will accumulate features from the architecture // extension suffixes on -mcpu and -march (e.g. the 'bar' in // -mcpu=foo+bar). We want to apply those after the features derived // from the FPU, in case -mfpu generates a negative feature which // the +bar is supposed to override. std::vector ExtensionFeatures; if (!ForAS) { // FIXME: Note, this is a hack, the LLVM backend doesn't actually use these // yet (it uses the -mfloat-abi and -msoft-float options), and it is // stripped out by the ARM target. We should probably pass this a new // -target-option, which is handled by the -cc1/-cc1as invocation. // // FIXME2: For consistency, it would be ideal if we set up the target // machine state the same when using the frontend or the assembler. We don't // currently do that for the assembler, we pass the options directly to the // backend and never even instantiate the frontend TargetInfo. If we did, // and used its handleTargetFeatures hook, then we could ensure the // assembler and the frontend behave the same. // Use software floating point operations? if (ABI == arm::FloatABI::Soft) Features.push_back("+soft-float"); // Use software floating point argument passing? if (ABI != arm::FloatABI::Hard) Features.push_back("+soft-float-abi"); } else { // Here, we make sure that -Wa,-mfpu/cpu/arch/hwdiv will be passed down // to the assembler correctly. for (const Arg *A : Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) { // We use getValues here because you can have many options per -Wa // We will keep the last one we find for each of these for (StringRef Value : A->getValues()) { if (Value.starts_with("-mfpu=")) { WaFPU = std::make_pair(A, Value.substr(6)); } else if (Value.starts_with("-mcpu=")) { WaCPU = std::make_pair(A, Value.substr(6)); } else if (Value.starts_with("-mhwdiv=")) { WaHDiv = std::make_pair(A, Value.substr(8)); } else if (Value.starts_with("-march=")) { WaArch = std::make_pair(A, Value.substr(7)); } } } // The integrated assembler doesn't implement e_flags setting behavior for // -meabi=gnu (gcc -mabi={apcs-gnu,atpcs} passes -meabi=gnu to gas). For // compatibility we accept but warn. if (Arg *A = Args.getLastArgNoClaim(options::OPT_mabi_EQ)) A->ignoreTargetSpecific(); } if (getReadTPMode(D, Args, Triple, ForAS) == ReadTPMode::TPIDRURW) Features.push_back("+read-tp-tpidrurw"); if (getReadTPMode(D, Args, Triple, ForAS) == ReadTPMode::TPIDRURO) Features.push_back("+read-tp-tpidruro"); if (getReadTPMode(D, Args, Triple, ForAS) == ReadTPMode::TPIDRPRW) Features.push_back("+read-tp-tpidrprw"); const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ); const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ); StringRef ArchName; StringRef CPUName; llvm::ARM::FPUKind ArchArgFPUKind = llvm::ARM::FK_INVALID; llvm::ARM::FPUKind CPUArgFPUKind = llvm::ARM::FK_INVALID; // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=. if (WaCPU) { if (CPUArg) D.Diag(clang::diag::warn_drv_unused_argument) << CPUArg->getAsString(Args); CPUName = WaCPU->second; CPUArg = WaCPU->first; } else if (CPUArg) CPUName = CPUArg->getValue(); // Check -march. ClangAs gives preference to -Wa,-march=. if (WaArch) { if (ArchArg) D.Diag(clang::diag::warn_drv_unused_argument) << ArchArg->getAsString(Args); ArchName = WaArch->second; // This will set any features after the base architecture. checkARMArchName(D, WaArch->first, Args, ArchName, CPUName, ExtensionFeatures, Triple, ArchArgFPUKind); // The base architecture was handled in ToolChain::ComputeLLVMTriple because // triple is read only by this point. } else if (ArchArg) { ArchName = ArchArg->getValue(); checkARMArchName(D, ArchArg, Args, ArchName, CPUName, ExtensionFeatures, Triple, ArchArgFPUKind); } // Add CPU features for generic CPUs if (CPUName == "native") { for (auto &F : llvm::sys::getHostCPUFeatures()) Features.push_back( Args.MakeArgString((F.second ? "+" : "-") + F.first())); } else if (!CPUName.empty()) { // This sets the default features for the specified CPU. We certainly don't // want to override the features that have been explicitly specified on the // command line. Therefore, process them directly instead of appending them // at the end later. DecodeARMFeaturesFromCPU(D, CPUName, Features); } if (CPUArg) checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, ExtensionFeatures, Triple, CPUArgFPUKind); // TODO Handle -mtune=. Suppress -Wunused-command-line-argument as a // longstanding behavior. (void)Args.getLastArg(options::OPT_mtune_EQ); // Honor -mfpu=. ClangAs gives preference to -Wa,-mfpu=. llvm::ARM::FPUKind FPUKind = llvm::ARM::FK_INVALID; const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ); if (WaFPU) { if (FPUArg) D.Diag(clang::diag::warn_drv_unused_argument) << FPUArg->getAsString(Args); (void)getARMFPUFeatures(D, WaFPU->first, Args, WaFPU->second, Features); } else if (FPUArg) { FPUKind = getARMFPUFeatures(D, FPUArg, Args, FPUArg->getValue(), Features); } else if (Triple.isAndroid() && getARMSubArchVersionNumber(Triple) >= 7) { const char *AndroidFPU = "neon"; FPUKind = llvm::ARM::parseFPU(AndroidFPU); if (!llvm::ARM::getFPUFeatures(FPUKind, Features)) D.Diag(clang::diag::err_drv_clang_unsupported) << std::string("-mfpu=") + AndroidFPU; } else if (ArchArgFPUKind != llvm::ARM::FK_INVALID || CPUArgFPUKind != llvm::ARM::FK_INVALID) { FPUKind = CPUArgFPUKind != llvm::ARM::FK_INVALID ? CPUArgFPUKind : ArchArgFPUKind; (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } else { if (!ForAS) { std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple); llvm::ARM::ArchKind ArchKind = arm::getLLVMArchKindForARM(CPU, ArchName, Triple); FPUKind = llvm::ARM::getDefaultFPU(CPU, ArchKind); (void)llvm::ARM::getFPUFeatures(FPUKind, Features); } } // Now we've finished accumulating features from arch, cpu and fpu, // we can append the ones for architecture extensions that we // collected separately. Features.insert(std::end(Features), std::begin(ExtensionFeatures), std::end(ExtensionFeatures)); // Honor -mhwdiv=. ClangAs gives preference to -Wa,-mhwdiv=. const Arg *HDivArg = Args.getLastArg(options::OPT_mhwdiv_EQ); if (WaHDiv) { if (HDivArg) D.Diag(clang::diag::warn_drv_unused_argument) << HDivArg->getAsString(Args); getARMHWDivFeatures(D, WaHDiv->first, Args, WaHDiv->second, Features); } else if (HDivArg) getARMHWDivFeatures(D, HDivArg, Args, HDivArg->getValue(), Features); // Handle (arch-dependent) fp16fml/fullfp16 relationship. // Must happen before any features are disabled due to soft-float. // FIXME: this fp16fml option handling will be reimplemented after the // TargetParser rewrite. const auto ItRNoFullFP16 = std::find(Features.rbegin(), Features.rend(), "-fullfp16"); const auto ItRFP16FML = std::find(Features.rbegin(), Features.rend(), "+fp16fml"); if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8_4a) { const auto ItRFullFP16 = std::find(Features.rbegin(), Features.rend(), "+fullfp16"); if (ItRFullFP16 < ItRNoFullFP16 && ItRFullFP16 < ItRFP16FML) { // Only entangled feature that can be to the right of this +fullfp16 is -fp16fml. // Only append the +fp16fml if there is no -fp16fml after the +fullfp16. if (std::find(Features.rbegin(), ItRFullFP16, "-fp16fml") == ItRFullFP16) Features.push_back("+fp16fml"); } else goto fp16_fml_fallthrough; } else { fp16_fml_fallthrough: // In both of these cases, putting the 'other' feature on the end of the vector will // result in the same effect as placing it immediately after the current feature. if (ItRNoFullFP16 < ItRFP16FML) Features.push_back("-fp16fml"); else if (ItRNoFullFP16 > ItRFP16FML) Features.push_back("+fullfp16"); } // Setting -msoft-float/-mfloat-abi=soft, -mfpu=none, or adding +nofp to // -march/-mcpu effectively disables the FPU (GCC ignores the -mfpu options in // this case). Note that the ABI can also be set implicitly by the target // selected. bool HasFPRegs = true; if (ABI == arm::FloatABI::Soft) { llvm::ARM::getFPUFeatures(llvm::ARM::FK_NONE, Features); // Disable all features relating to hardware FP, not already disabled by the // above call. Features.insert(Features.end(), {"-dotprod", "-fp16fml", "-bf16", "-mve", "-mve.fp"}); HasFPRegs = false; FPUKind = llvm::ARM::FK_NONE; } else if (FPUKind == llvm::ARM::FK_NONE || ArchArgFPUKind == llvm::ARM::FK_NONE || CPUArgFPUKind == llvm::ARM::FK_NONE) { // -mfpu=none, -march=armvX+nofp or -mcpu=X+nofp is *very* similar to // -mfloat-abi=soft, only that it should not disable MVE-I. They disable the // FPU, but not the FPU registers, thus MVE-I, which depends only on the // latter, is still supported. Features.insert(Features.end(), {"-dotprod", "-fp16fml", "-bf16", "-mve.fp"}); HasFPRegs = hasIntegerMVE(Features); FPUKind = llvm::ARM::FK_NONE; } if (!HasFPRegs) Features.emplace_back("-fpregs"); // En/disable crc code generation. if (Arg *A = Args.getLastArg(options::OPT_mcrc, options::OPT_mnocrc)) { if (A->getOption().matches(options::OPT_mcrc)) Features.push_back("+crc"); else Features.push_back("-crc"); } // For Arch >= ARMv8.0 && A or R profile: crypto = sha2 + aes // Rather than replace within the feature vector, determine whether each // algorithm is enabled and append this to the end of the vector. // The algorithms can be controlled by their specific feature or the crypto // feature, so their status can be determined by the last occurance of // either in the vector. This allows one to supercede the other. // e.g. +crypto+noaes in -march/-mcpu should enable sha2, but not aes // FIXME: this needs reimplementation after the TargetParser rewrite bool HasSHA2 = false; bool HasAES = false; const auto ItCrypto = llvm::find_if(llvm::reverse(Features), [](const StringRef F) { return F.contains("crypto"); }); const auto ItSHA2 = llvm::find_if(llvm::reverse(Features), [](const StringRef F) { return F.contains("crypto") || F.contains("sha2"); }); const auto ItAES = llvm::find_if(llvm::reverse(Features), [](const StringRef F) { return F.contains("crypto") || F.contains("aes"); }); const bool FoundSHA2 = ItSHA2 != Features.rend(); const bool FoundAES = ItAES != Features.rend(); if (FoundSHA2) HasSHA2 = ItSHA2->take_front() == "+"; if (FoundAES) HasAES = ItAES->take_front() == "+"; if (ItCrypto != Features.rend()) { if (HasSHA2 && HasAES) Features.push_back("+crypto"); else Features.push_back("-crypto"); if (HasSHA2) Features.push_back("+sha2"); else Features.push_back("-sha2"); if (HasAES) Features.push_back("+aes"); else Features.push_back("-aes"); } if (HasSHA2 || HasAES) { StringRef ArchSuffix = arm::getLLVMArchSuffixForARM( arm::getARMTargetCPU(CPUName, ArchName, Triple), ArchName, Triple); llvm::ARM::ProfileKind ArchProfile = llvm::ARM::parseArchProfile(ArchSuffix); if (!((llvm::ARM::parseArchVersion(ArchSuffix) >= 8) && (ArchProfile == llvm::ARM::ProfileKind::A || ArchProfile == llvm::ARM::ProfileKind::R))) { if (HasSHA2) D.Diag(clang::diag::warn_target_unsupported_extension) << "sha2" << llvm::ARM::getArchName(llvm::ARM::parseArch(ArchSuffix)); if (HasAES) D.Diag(clang::diag::warn_target_unsupported_extension) << "aes" << llvm::ARM::getArchName(llvm::ARM::parseArch(ArchSuffix)); // With -fno-integrated-as -mfpu=crypto-neon-fp-armv8 some assemblers such // as the GNU assembler will permit the use of crypto instructions as the // fpu will override the architecture. We keep the crypto feature in this // case to preserve compatibility. In all other cases we remove the crypto // feature. if (!Args.hasArg(options::OPT_fno_integrated_as)) { Features.push_back("-sha2"); Features.push_back("-aes"); } } } // Propagate frame-chain model selection if (Arg *A = Args.getLastArg(options::OPT_mframe_chain)) { StringRef FrameChainOption = A->getValue(); if (FrameChainOption.starts_with("aapcs")) Features.push_back("+aapcs-frame-chain"); } // CMSE: Check for target 8M (for -mcmse to be applicable) is performed later. if (Args.getLastArg(options::OPT_mcmse)) Features.push_back("+8msecext"); if (Arg *A = Args.getLastArg(options::OPT_mfix_cmse_cve_2021_35465, options::OPT_mno_fix_cmse_cve_2021_35465)) { if (!Args.getLastArg(options::OPT_mcmse)) D.Diag(diag::err_opt_not_valid_without_opt) << A->getOption().getName() << "-mcmse"; if (A->getOption().matches(options::OPT_mfix_cmse_cve_2021_35465)) Features.push_back("+fix-cmse-cve-2021-35465"); else Features.push_back("-fix-cmse-cve-2021-35465"); } // This also handles the -m(no-)fix-cortex-a72-1655431 arguments via aliases. if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a57_aes_1742098, options::OPT_mno_fix_cortex_a57_aes_1742098)) { if (A->getOption().matches(options::OPT_mfix_cortex_a57_aes_1742098)) { Features.push_back("+fix-cortex-a57-aes-1742098"); } else { Features.push_back("-fix-cortex-a57-aes-1742098"); } } // Look for the last occurrence of -mlong-calls or -mno-long-calls. If // neither options are specified, see if we are compiling for kernel/kext and // decide whether to pass "+long-calls" based on the OS and its version. if (Arg *A = Args.getLastArg(options::OPT_mlong_calls, options::OPT_mno_long_calls)) { if (A->getOption().matches(options::OPT_mlong_calls)) Features.push_back("+long-calls"); } else if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6)) && !Triple.isWatchOS() && !Triple.isXROS()) { Features.push_back("+long-calls"); } // Generate execute-only output (no data access to code sections). // This only makes sense for the compiler, not for the assembler. // It's not needed for multilib selection and may hide an unused // argument diagnostic if the code is always run. if (!ForAS && !ForMultilib) { // Supported only on ARMv6T2 and ARMv7 and above. // Cannot be combined with -mno-movt. if (Arg *A = Args.getLastArg(options::OPT_mexecute_only, options::OPT_mno_execute_only)) { if (A->getOption().matches(options::OPT_mexecute_only)) { if (getARMSubArchVersionNumber(Triple) < 7 && llvm::ARM::parseArch(Triple.getArchName()) != llvm::ARM::ArchKind::ARMV6T2 && llvm::ARM::parseArch(Triple.getArchName()) != llvm::ARM::ArchKind::ARMV6M) D.Diag(diag::err_target_unsupported_execute_only) << Triple.getArchName(); else if (llvm::ARM::parseArch(Triple.getArchName()) == llvm::ARM::ArchKind::ARMV6M) { if (Arg *PIArg = Args.getLastArg(options::OPT_fropi, options::OPT_frwpi, options::OPT_fpic, options::OPT_fpie, options::OPT_fPIC, options::OPT_fPIE)) D.Diag(diag::err_opt_not_valid_with_opt_on_target) << A->getAsString(Args) << PIArg->getAsString(Args) << Triple.getArchName(); } else if (Arg *B = Args.getLastArg(options::OPT_mno_movt)) D.Diag(diag::err_opt_not_valid_with_opt) << A->getAsString(Args) << B->getAsString(Args); Features.push_back("+execute-only"); } } } if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access, options::OPT_munaligned_access, options::OPT_mstrict_align, options::OPT_mno_strict_align)) { // Kernel code has more strict alignment requirements. if (KernelOrKext || A->getOption().matches(options::OPT_mno_unaligned_access) || A->getOption().matches(options::OPT_mstrict_align)) { Features.push_back("+strict-align"); } else { // No v6M core supports unaligned memory access (v6M ARM ARM A3.2). if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) D.Diag(diag::err_target_unsupported_unaligned) << "v6m"; // v8M Baseline follows on from v6M, so doesn't support unaligned memory // access either. else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline) D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base"; } } else { // Assume pre-ARMv6 doesn't support unaligned accesses. // // ARMv6 may or may not support unaligned accesses depending on the // SCTLR.U bit, which is architecture-specific. We assume ARMv6 // Darwin and NetBSD targets support unaligned accesses, and others don't. // // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit which // raises an alignment fault on unaligned accesses. Assume ARMv7+ supports // unaligned accesses, except ARMv6-M, and ARMv8-M without the Main // Extension. This aligns with the default behavior of ARM's downstream // versions of GCC and Clang. // // Users can change the default behavior via -m[no-]unaliged-access. int VersionNum = getARMSubArchVersionNumber(Triple); if (Triple.isOSDarwin() || Triple.isOSNetBSD()) { if (VersionNum < 6 || Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) Features.push_back("+strict-align"); } else if (VersionNum < 7 || Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m || Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline) { Features.push_back("+strict-align"); } } // llvm does not support reserving registers in general. There is support // for reserving r9 on ARM though (defined as a platform-specific register // in ARM EABI). if (Args.hasArg(options::OPT_ffixed_r9)) Features.push_back("+reserve-r9"); // The kext linker doesn't know how to deal with movw/movt. if (KernelOrKext || Args.hasArg(options::OPT_mno_movt)) Features.push_back("+no-movt"); if (Args.hasArg(options::OPT_mno_neg_immediates)) Features.push_back("+no-neg-immediates"); // Enable/disable straight line speculation hardening. if (Arg *A = Args.getLastArg(options::OPT_mharden_sls_EQ)) { StringRef Scope = A->getValue(); bool EnableRetBr = false; bool EnableBlr = false; bool DisableComdat = false; if (Scope != "none") { SmallVector Opts; Scope.split(Opts, ","); for (auto Opt : Opts) { Opt = Opt.trim(); if (Opt == "all") { EnableBlr = true; EnableRetBr = true; continue; } if (Opt == "retbr") { EnableRetBr = true; continue; } if (Opt == "blr") { EnableBlr = true; continue; } if (Opt == "comdat") { DisableComdat = false; continue; } if (Opt == "nocomdat") { DisableComdat = true; continue; } D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Scope; break; } } if (EnableRetBr || EnableBlr) if (!(isARMAProfile(Triple) && getARMSubArchVersionNumber(Triple) >= 7)) D.Diag(diag::err_sls_hardening_arm_not_supported) << Scope << A->getAsString(Args); if (EnableRetBr) Features.push_back("+harden-sls-retbr"); if (EnableBlr) Features.push_back("+harden-sls-blr"); if (DisableComdat) { Features.push_back("+harden-sls-nocomdat"); } } if (Args.getLastArg(options::OPT_mno_bti_at_return_twice)) Features.push_back("+no-bti-at-return-twice"); checkARMFloatABI(D, Args, HasFPRegs); return FPUKind; } std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) { std::string MArch; if (!Arch.empty()) MArch = std::string(Arch); else MArch = std::string(Triple.getArchName()); MArch = StringRef(MArch).split("+").first.lower(); // Handle -march=native. if (MArch == "native") { std::string CPU = std::string(llvm::sys::getHostCPUName()); if (CPU != "generic") { // Translate the native cpu into the architecture suffix for that CPU. StringRef Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch, Triple); // If there is no valid architecture suffix for this CPU we don't know how // to handle it, so return no architecture. if (Suffix.empty()) MArch = ""; else MArch = std::string("arm") + Suffix.str(); } } return MArch; } /// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting. StringRef arm::getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple) { std::string MArch = getARMArch(Arch, Triple); // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch // here means an -march=native that we can't handle, so instead return no CPU. if (MArch.empty()) return StringRef(); // We need to return an empty string here on invalid MArch values as the // various places that call this function can't cope with a null result. return llvm::ARM::getARMCPUForArch(Triple, MArch); } /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting. std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch, const llvm::Triple &Triple) { // FIXME: Warn on inconsistent use of -mcpu and -march. // If we have -mcpu=, use that. if (!CPU.empty()) { std::string MCPU = StringRef(CPU).split("+").first.lower(); // Handle -mcpu=native. if (MCPU == "native") return std::string(llvm::sys::getHostCPUName()); else return MCPU; } return std::string(getARMCPUForMArch(Arch, Triple)); } /// getLLVMArchSuffixForARM - Get the LLVM ArchKind value to use for a /// particular CPU (or Arch, if CPU is generic). This is needed to /// pass to functions like llvm::ARM::getDefaultFPU which need an /// ArchKind as well as a CPU name. llvm::ARM::ArchKind arm::getLLVMArchKindForARM(StringRef CPU, StringRef Arch, const llvm::Triple &Triple) { llvm::ARM::ArchKind ArchKind; if (CPU == "generic" || CPU.empty()) { std::string ARMArch = tools::arm::getARMArch(Arch, Triple); ArchKind = llvm::ARM::parseArch(ARMArch); if (ArchKind == llvm::ARM::ArchKind::INVALID) // In case of generic Arch, i.e. "arm", // extract arch from default cpu of the Triple ArchKind = llvm::ARM::parseCPUArch(llvm::ARM::getARMCPUForArch(Triple, ARMArch)); } else { // FIXME: horrible hack to get around the fact that Cortex-A7 is only an // armv7k triple if it's actually been specified via "-arch armv7k". ArchKind = (Arch == "armv7k" || Arch == "thumbv7k") ? llvm::ARM::ArchKind::ARMV7K : llvm::ARM::parseCPUArch(CPU); } return ArchKind; } /// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular /// CPU (or Arch, if CPU is generic). // FIXME: This is redundant with -mcpu, why does LLVM use this. StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch, const llvm::Triple &Triple) { llvm::ARM::ArchKind ArchKind = getLLVMArchKindForARM(CPU, Arch, Triple); if (ArchKind == llvm::ARM::ArchKind::INVALID) return ""; return llvm::ARM::getSubArch(ArchKind); } void arm::appendBE8LinkFlag(const ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple) { if (Args.hasArg(options::OPT_r)) return; // ARMv7 (and later) and ARMv6-M do not support BE-32, so instruct the linker // to generate BE-8 executables. if (arm::getARMSubArchVersionNumber(Triple) >= 7 || arm::isARMMProfile(Triple)) CmdArgs.push_back("--be8"); } diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp index 543f3965dfd4..5e9a655eaf82 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Gnu.cpp @@ -1,3480 +1,3482 @@ //===--- Gnu.cpp - Gnu Tool and ToolChain Implementations -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Gnu.h" #include "Arch/ARM.h" #include "Arch/CSKY.h" #include "Arch/LoongArch.h" #include "Arch/Mips.h" #include "Arch/PPC.h" #include "Arch/RISCV.h" #include "Arch/Sparc.h" #include "Arch/SystemZ.h" #include "CommonArgs.h" #include "Linux.h" #include "clang/Config/config.h" // for GCC_INSTALL_PREFIX #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/MultilibBuilder.h" #include "clang/Driver/Options.h" #include "clang/Driver/Tool.h" #include "clang/Driver/ToolChain.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Path.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/TargetParser.h" #include using namespace clang::driver; using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; using tools::addMultilibFlag; using tools::addPathIfExists; static bool forwardToGCC(const Option &O) { // LinkerInput options have been forwarded. Don't duplicate. if (O.hasFlag(options::LinkerInput)) return false; return O.matches(options::OPT_Link_Group) || O.hasFlag(options::LinkOption); } // Switch CPU names not recognized by GNU assembler to a close CPU that it does // recognize, instead of a lower march from being picked in the absence of a cpu // flag. static void normalizeCPUNamesForAssembler(const ArgList &Args, ArgStringList &CmdArgs) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPUArg(A->getValue()); if (CPUArg.equals_insensitive("krait")) CmdArgs.push_back("-mcpu=cortex-a15"); else if (CPUArg.equals_insensitive("kryo")) CmdArgs.push_back("-mcpu=cortex-a57"); else Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ); } } void tools::gcc::Common::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; for (const auto &A : Args) { if (forwardToGCC(A->getOption())) { // It is unfortunate that we have to claim here, as this means // we will basically never report anything interesting for // platforms using a generic gcc, even if we are just using gcc // to get to the assembler. A->claim(); A->render(Args, CmdArgs); } } RenderExtraToolArgs(JA, CmdArgs); // If using a driver, force the arch. if (getToolChain().getTriple().isOSDarwin()) { CmdArgs.push_back("-arch"); CmdArgs.push_back( Args.MakeArgString(getToolChain().getDefaultUniversalArchName())); } // Try to force gcc to match the tool chain we want, if we recognize // the arch. // // FIXME: The triple class should directly provide the information we want // here. switch (getToolChain().getArch()) { default: break; case llvm::Triple::x86: case llvm::Triple::ppc: case llvm::Triple::ppcle: CmdArgs.push_back("-m32"); break; case llvm::Triple::x86_64: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: CmdArgs.push_back("-m64"); break; case llvm::Triple::sparcel: CmdArgs.push_back("-EL"); break; } assert((Output.isFilename() || Output.isNothing()) && "Invalid output."); if (Output.isFilename()) { CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); } else { CmdArgs.push_back("-fsyntax-only"); } Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); // Only pass -x if gcc will understand it; otherwise hope gcc // understands the suffix correctly. The main use case this would go // wrong in is for linker inputs if they happened to have an odd // suffix; really the only way to get this to happen is a command // like '-x foobar a.c' which will treat a.c like a linker input. // // FIXME: For the linker case specifically, can we safely convert // inputs into '-Wl,' options? for (const auto &II : Inputs) { // Don't try to pass LLVM or AST inputs to a generic gcc. if (types::isLLVMIR(II.getType())) D.Diag(clang::diag::err_drv_no_linker_llvm_support) << getToolChain().getTripleString(); else if (II.getType() == types::TY_AST) D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString(); else if (II.getType() == types::TY_ModuleFile) D.Diag(diag::err_drv_no_module_support) << getToolChain().getTripleString(); if (types::canTypeBeUserSpecified(II.getType())) { CmdArgs.push_back("-x"); CmdArgs.push_back(types::getTypeName(II.getType())); } if (II.isFilename()) CmdArgs.push_back(II.getFilename()); else { const Arg &A = II.getInputArg(); // Reverse translate some rewritten options. if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) { CmdArgs.push_back("-lstdc++"); continue; } // Don't render as input, we need gcc to do the translations. A.render(Args, CmdArgs); } } const std::string &customGCCName = D.getCCCGenericGCCName(); const char *GCCName; if (!customGCCName.empty()) GCCName = customGCCName.c_str(); else if (D.CCCIsCXX()) { GCCName = "g++"; } else GCCName = "gcc"; const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName)); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); } void tools::gcc::Preprocessor::RenderExtraToolArgs( const JobAction &JA, ArgStringList &CmdArgs) const { CmdArgs.push_back("-E"); } void tools::gcc::Compiler::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { const Driver &D = getToolChain().getDriver(); switch (JA.getType()) { // If -flto, etc. are present then make sure not to force assembly output. case types::TY_LLVM_IR: case types::TY_LTO_IR: case types::TY_LLVM_BC: case types::TY_LTO_BC: CmdArgs.push_back("-c"); break; // We assume we've got an "integrated" assembler in that gcc will produce an // object file itself. case types::TY_Object: CmdArgs.push_back("-c"); break; case types::TY_PP_Asm: CmdArgs.push_back("-S"); break; case types::TY_Nothing: CmdArgs.push_back("-fsyntax-only"); break; default: D.Diag(diag::err_drv_invalid_gcc_output_type) << getTypeName(JA.getType()); } } void tools::gcc::Linker::RenderExtraToolArgs(const JobAction &JA, ArgStringList &CmdArgs) const { // The types are (hopefully) good enough. } static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { switch (T.getArch()) { case llvm::Triple::x86: if (T.isOSIAMCU()) return "elf_iamcu"; return "elf_i386"; case llvm::Triple::aarch64: return "aarch64linux"; case llvm::Triple::aarch64_be: return "aarch64linuxb"; case llvm::Triple::arm: case llvm::Triple::thumb: case llvm::Triple::armeb: case llvm::Triple::thumbeb: return tools::arm::isARMBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi"; case llvm::Triple::m68k: return "m68kelf"; case llvm::Triple::ppc: if (T.isOSLinux()) return "elf32ppclinux"; return "elf32ppc"; case llvm::Triple::ppcle: if (T.isOSLinux()) return "elf32lppclinux"; return "elf32lppc"; case llvm::Triple::ppc64: return "elf64ppc"; case llvm::Triple::ppc64le: return "elf64lppc"; case llvm::Triple::riscv32: return "elf32lriscv"; case llvm::Triple::riscv64: return "elf64lriscv"; case llvm::Triple::sparc: case llvm::Triple::sparcel: return "elf32_sparc"; case llvm::Triple::sparcv9: return "elf64_sparc"; case llvm::Triple::loongarch32: return "elf32loongarch"; case llvm::Triple::loongarch64: return "elf64loongarch"; case llvm::Triple::mips: return "elf32btsmip"; case llvm::Triple::mipsel: return "elf32ltsmip"; case llvm::Triple::mips64: if (tools::mips::hasMipsAbiArg(Args, "n32") || T.getEnvironment() == llvm::Triple::GNUABIN32) return "elf32btsmipn32"; return "elf64btsmip"; case llvm::Triple::mips64el: if (tools::mips::hasMipsAbiArg(Args, "n32") || T.getEnvironment() == llvm::Triple::GNUABIN32) return "elf32ltsmipn32"; return "elf64ltsmip"; case llvm::Triple::systemz: return "elf64_s390"; case llvm::Triple::x86_64: if (T.isX32()) return "elf32_x86_64"; return "elf_x86_64"; case llvm::Triple::ve: return "elf64ve"; case llvm::Triple::csky: return "cskyelf_linux"; default: return nullptr; } } static bool getStaticPIE(const ArgList &Args, const ToolChain &TC) { bool HasStaticPIE = Args.hasArg(options::OPT_static_pie); if (HasStaticPIE && Args.hasArg(options::OPT_no_pie)) { const Driver &D = TC.getDriver(); const llvm::opt::OptTable &Opts = D.getOpts(); StringRef StaticPIEName = Opts.getOptionName(options::OPT_static_pie); StringRef NoPIEName = Opts.getOptionName(options::OPT_nopie); D.Diag(diag::err_drv_cannot_mix_options) << StaticPIEName << NoPIEName; } return HasStaticPIE; } static bool getStatic(const ArgList &Args) { return Args.hasArg(options::OPT_static) && !Args.hasArg(options::OPT_static_pie); } void tools::gnutools::StaticLibTool::ConstructJob( Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const Driver &D = getToolChain().getDriver(); // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); // Silence warnings when linking C code with a C++ '-stdlib' argument. Args.ClaimAllArgs(options::OPT_stdlib_EQ); // ar tool command "llvm-ar ". ArgStringList CmdArgs; // Create and insert file members with a deterministic index. CmdArgs.push_back("rcsD"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) { if (II.isFilename()) { CmdArgs.push_back(II.getFilename()); } } // Delete old output archive file if it already exists before generating a new // archive file. auto OutputFileName = Output.getFilename(); if (Output.isFilename() && llvm::sys::fs::exists(OutputFileName)) { if (std::error_code EC = llvm::sys::fs::remove(OutputFileName)) { D.Diag(diag::err_drv_unable_to_remove_file) << EC.message(); return; } } const char *Exec = Args.MakeArgString(getToolChain().GetStaticLibToolPath()); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); } void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { // FIXME: The Linker class constructor takes a ToolChain and not a // Generic_ELF, so the static_cast might return a reference to a invalid // instance (see PR45061). Ideally, the Linker constructor needs to take a // Generic_ELF instead. const auto &ToolChain = static_cast(getToolChain()); const Driver &D = ToolChain.getDriver(); const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); const llvm::Triple::ArchType Arch = ToolChain.getArch(); const bool isOHOSFamily = ToolChain.getTriple().isOHOSFamily(); const bool isAndroid = ToolChain.getTriple().isAndroid(); const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU(); const bool IsVE = ToolChain.getTriple().isVE(); const bool IsStaticPIE = getStaticPIE(Args, ToolChain); const bool IsStatic = getStatic(Args); const bool HasCRTBeginEndFiles = ToolChain.getTriple().hasEnvironment() || (ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies); ArgStringList CmdArgs; // Silence warning for "clang -g foo.o -o foo" Args.ClaimAllArgs(options::OPT_g_Group); // and "clang -emit-llvm foo.o -o foo" Args.ClaimAllArgs(options::OPT_emit_llvm); // and for "clang -w foo.o -o foo". Other warning options are already // handled somewhere else. Args.ClaimAllArgs(options::OPT_w); if (!D.SysRoot.empty()) CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot)); if (Args.hasArg(options::OPT_s)) CmdArgs.push_back("-s"); if (Triple.isARM() || Triple.isThumb()) { bool IsBigEndian = arm::isARMBigEndian(Triple, Args); if (IsBigEndian) arm::appendBE8LinkFlag(Args, CmdArgs, Triple); CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL"); } else if (Triple.isAArch64()) { CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } // Most Android ARM64 targets should enable the linker fix for erratum // 843419. Only non-Cortex-A53 devices are allowed to skip this flag. if (Arch == llvm::Triple::aarch64 && (isAndroid || isOHOSFamily)) { std::string CPU = getCPUName(D, Args, Triple); if (CPU.empty() || CPU == "generic" || CPU == "cortex-a53") CmdArgs.push_back("--fix-cortex-a53-843419"); } ToolChain.addExtraOpts(CmdArgs); CmdArgs.push_back("--eh-frame-hdr"); if (const char *LDMOption = getLDMOption(ToolChain.getTriple(), Args)) { CmdArgs.push_back("-m"); CmdArgs.push_back(LDMOption); } else { D.Diag(diag::err_target_unknown_triple) << Triple.str(); return; } if (Triple.isRISCV()) { CmdArgs.push_back("-X"); if (Args.hasArg(options::OPT_mno_relax)) CmdArgs.push_back("--no-relax"); } const bool IsShared = Args.hasArg(options::OPT_shared); if (IsShared) CmdArgs.push_back("-shared"); bool IsPIE = false; if (IsStaticPIE) { CmdArgs.push_back("-static"); CmdArgs.push_back("-pie"); CmdArgs.push_back("--no-dynamic-linker"); CmdArgs.push_back("-z"); CmdArgs.push_back("text"); } else if (IsStatic) { CmdArgs.push_back("-static"); } else if (!Args.hasArg(options::OPT_r)) { if (Args.hasArg(options::OPT_rdynamic)) CmdArgs.push_back("-export-dynamic"); if (!IsShared) { IsPIE = Args.hasFlag(options::OPT_pie, options::OPT_no_pie, ToolChain.isPIEDefault(Args)); if (IsPIE) CmdArgs.push_back("-pie"); CmdArgs.push_back("-dynamic-linker"); CmdArgs.push_back(Args.MakeArgString(Twine(D.DyldPrefix) + ToolChain.getDynamicLinker(Args))); } } CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, options::OPT_r)) { if (!isAndroid && !IsIAMCU) { const char *crt1 = nullptr; if (!Args.hasArg(options::OPT_shared)) { if (Args.hasArg(options::OPT_pg)) crt1 = "gcrt1.o"; else if (IsPIE) crt1 = "Scrt1.o"; else if (IsStaticPIE) crt1 = "rcrt1.o"; else crt1 = "crt1.o"; } if (crt1) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1))); CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o"))); } if (IsVE) { CmdArgs.push_back("-z"); CmdArgs.push_back("max-page-size=0x4000000"); } if (IsIAMCU) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o"))); else if (HasCRTBeginEndFiles) { std::string P; if (ToolChain.GetRuntimeLibType(Args) == ToolChain::RLT_CompilerRT && !isAndroid) { std::string crtbegin = ToolChain.getCompilerRT(Args, "crtbegin", ToolChain::FT_Object); if (ToolChain.getVFS().exists(crtbegin)) P = crtbegin; } if (P.empty()) { const char *crtbegin; if (Args.hasArg(options::OPT_shared)) crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o"; else if (IsStatic) crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o"; else if (IsPIE || IsStaticPIE) crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o"; else crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o"; P = ToolChain.GetFilePath(crtbegin); } CmdArgs.push_back(Args.MakeArgString(P)); } // Add crtfastmath.o if available and fast math is enabled. ToolChain.addFastMathRuntimeIfAvailable(Args, CmdArgs); if (isAndroid && Args.hasFlag(options::OPT_fandroid_pad_segment, options::OPT_fno_android_pad_segment, false)) CmdArgs.push_back( Args.MakeArgString(ToolChain.GetFilePath("crt_pad_segment.o"))); } Args.addAllArgs(CmdArgs, {options::OPT_L, options::OPT_u}); ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (D.isUsingLTO()) { assert(!Inputs.empty() && "Must have at least one input."); // Find the first filename InputInfo object. auto Input = llvm::find_if( Inputs, [](const InputInfo &II) -> bool { return II.isFilename(); }); if (Input == Inputs.end()) // For a very rare case, all of the inputs to the linker are // InputArg. If that happens, just use the first InputInfo. Input = Inputs.begin(); addLTOOptions(ToolChain, Args, CmdArgs, Output, *Input, D.getLTOMode() == LTOK_Thin); } if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle)) CmdArgs.push_back("--no-demangle"); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); addHIPRuntimeLibArgs(ToolChain, C, Args, CmdArgs); // The profile runtime also needs access to system libraries. getToolChain().addProfileRTLibs(Args, CmdArgs); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { if (ToolChain.ShouldLinkCXXStdlib(Args)) { bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) && !Args.hasArg(options::OPT_static); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bstatic"); ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); if (OnlyLibstdcxxStatic) CmdArgs.push_back("-Bdynamic"); } CmdArgs.push_back("-lm"); } // Silence warnings when linking C code with a C++ '-stdlib' argument. Args.ClaimAllArgs(options::OPT_stdlib_EQ); // Additional linker set-up and flags for Fortran. This is required in order // to generate executables. As Fortran runtime depends on the C runtime, // these dependencies need to be listed before the C runtime below (i.e. // AddRunTimeLibs). if (D.IsFlangMode()) { addFortranRuntimeLibraryPath(ToolChain, Args, CmdArgs); addFortranRuntimeLibs(ToolChain, Args, CmdArgs); CmdArgs.push_back("-lm"); } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { if (IsStatic || IsStaticPIE) CmdArgs.push_back("--start-group"); if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(ToolChain, Args, CmdArgs); if (NeedsXRayDeps) linkXRayRuntimeDeps(ToolChain, Args, CmdArgs); bool WantPthread = Args.hasArg(options::OPT_pthread) || Args.hasArg(options::OPT_pthreads); // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && !Args.hasArg(options::OPT_static); // FIXME: Only pass GompNeedsRT = true for platforms with libgomp that // require librt. Most modern Linux platforms do, but some may not. if (addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP, JA.isHostOffloading(Action::OFK_OpenMP), /* GompNeedsRT= */ true)) // OpenMP runtimes implies pthreads when using the GNU toolchain. // FIXME: Does this really make sense for all GNU toolchains? WantPthread = true; AddRunTimeLibs(ToolChain, D, CmdArgs, Args); // LLVM support for atomics on 32-bit SPARC V8+ is incomplete, so // forcibly link with libatomic as a workaround. // TODO: Issue #41880 and D118021. if (getToolChain().getTriple().getArch() == llvm::Triple::sparc) { CmdArgs.push_back("--push-state"); CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-latomic"); CmdArgs.push_back("--pop-state"); } // We don't need libpthread neither for bionic (Android) nor for musl, // (used by OHOS as runtime library). if (WantPthread && !isAndroid && !isOHOSFamily) CmdArgs.push_back("-lpthread"); if (Args.hasArg(options::OPT_fsplit_stack)) CmdArgs.push_back("--wrap=pthread_create"); if (!Args.hasArg(options::OPT_nolibc)) CmdArgs.push_back("-lc"); // Add IAMCU specific libs, if needed. if (IsIAMCU) CmdArgs.push_back("-lgloss"); if (IsStatic || IsStaticPIE) CmdArgs.push_back("--end-group"); else AddRunTimeLibs(ToolChain, D, CmdArgs, Args); // Add IAMCU specific libs (outside the group), if needed. if (IsIAMCU) { CmdArgs.push_back("--as-needed"); CmdArgs.push_back("-lsoftfp"); CmdArgs.push_back("--no-as-needed"); } } if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) { if (HasCRTBeginEndFiles) { std::string P; if (ToolChain.GetRuntimeLibType(Args) == ToolChain::RLT_CompilerRT && !isAndroid) { std::string crtend = ToolChain.getCompilerRT(Args, "crtend", ToolChain::FT_Object); if (ToolChain.getVFS().exists(crtend)) P = crtend; } if (P.empty()) { const char *crtend; if (Args.hasArg(options::OPT_shared)) crtend = isAndroid ? "crtend_so.o" : "crtendS.o"; else if (IsPIE || IsStaticPIE) crtend = isAndroid ? "crtend_android.o" : "crtendS.o"; else crtend = isAndroid ? "crtend_android.o" : "crtend.o"; P = ToolChain.GetFilePath(crtend); } CmdArgs.push_back(Args.MakeArgString(P)); } if (!isAndroid) CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o"))); } } Args.AddAllArgs(CmdArgs, options::OPT_T); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); } void tools::gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { const auto &D = getToolChain().getDriver(); claimNoWarnArgs(Args); ArgStringList CmdArgs; llvm::Reloc::Model RelocationModel; unsigned PICLevel; bool IsPIE; const char *DefaultAssembler = "as"; // Enforce GNU as on Solaris; the native assembler's input syntax isn't fully // compatible. if (getToolChain().getTriple().isOSSolaris()) DefaultAssembler = "gas"; std::tie(RelocationModel, PICLevel, IsPIE) = ParsePICArgs(getToolChain(), Args); if (const Arg *A = Args.getLastArg(options::OPT_gz, options::OPT_gz_EQ)) { if (A->getOption().getID() == options::OPT_gz) { CmdArgs.push_back("--compress-debug-sections"); } else { StringRef Value = A->getValue(); if (Value == "none" || Value == "zlib" || Value == "zstd") { CmdArgs.push_back( Args.MakeArgString("--compress-debug-sections=" + Twine(Value))); } else { D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Value; } } } switch (getToolChain().getArch()) { default: break; // Add --32/--64 to make sure we get the format we want. // This is incomplete case llvm::Triple::x86: CmdArgs.push_back("--32"); break; case llvm::Triple::x86_64: if (getToolChain().getTriple().isX32()) CmdArgs.push_back("--x32"); else CmdArgs.push_back("--64"); break; case llvm::Triple::ppc: { CmdArgs.push_back("-a32"); CmdArgs.push_back("-mppc"); CmdArgs.push_back("-mbig-endian"); CmdArgs.push_back(ppc::getPPCAsmModeForCPU( getCPUName(D, Args, getToolChain().getTriple()))); break; } case llvm::Triple::ppcle: { CmdArgs.push_back("-a32"); CmdArgs.push_back("-mppc"); CmdArgs.push_back("-mlittle-endian"); CmdArgs.push_back(ppc::getPPCAsmModeForCPU( getCPUName(D, Args, getToolChain().getTriple()))); break; } case llvm::Triple::ppc64: { CmdArgs.push_back("-a64"); CmdArgs.push_back("-mppc64"); CmdArgs.push_back("-mbig-endian"); CmdArgs.push_back(ppc::getPPCAsmModeForCPU( getCPUName(D, Args, getToolChain().getTriple()))); break; } case llvm::Triple::ppc64le: { CmdArgs.push_back("-a64"); CmdArgs.push_back("-mppc64"); CmdArgs.push_back("-mlittle-endian"); CmdArgs.push_back(ppc::getPPCAsmModeForCPU( getCPUName(D, Args, getToolChain().getTriple()))); break; } case llvm::Triple::riscv32: case llvm::Triple::riscv64: { StringRef ABIName = riscv::getRISCVABI(Args, getToolChain().getTriple()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(ABIName.data()); std::string MArchName = riscv::getRISCVArch(Args, getToolChain().getTriple()); CmdArgs.push_back("-march"); CmdArgs.push_back(Args.MakeArgString(MArchName)); if (!Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true)) Args.addOptOutFlag(CmdArgs, options::OPT_mrelax, options::OPT_mno_relax); break; } case llvm::Triple::sparc: case llvm::Triple::sparcel: { CmdArgs.push_back("-32"); std::string CPU = getCPUName(D, Args, getToolChain().getTriple()); CmdArgs.push_back( sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::sparcv9: { CmdArgs.push_back("-64"); std::string CPU = getCPUName(D, Args, getToolChain().getTriple()); CmdArgs.push_back( sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple())); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::thumb: case llvm::Triple::thumbeb: { const llvm::Triple &Triple2 = getToolChain().getTriple(); CmdArgs.push_back(arm::isARMBigEndian(Triple2, Args) ? "-EB" : "-EL"); switch (Triple2.getSubArch()) { case llvm::Triple::ARMSubArch_v7: CmdArgs.push_back("-mfpu=neon"); break; case llvm::Triple::ARMSubArch_v8: CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8"); break; default: break; } switch (arm::getARMFloatABI(getToolChain(), Args)) { case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!"); case arm::FloatABI::Soft: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft")); break; case arm::FloatABI::SoftFP: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp")); break; case arm::FloatABI::Hard: CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard")); break; } Args.AddLastArg(CmdArgs, options::OPT_march_EQ); normalizeCPUNamesForAssembler(Args, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ); // The integrated assembler doesn't implement e_flags setting behavior for // -meabi=gnu (gcc -mabi={apcs-gnu,atpcs} passes -meabi=gnu to gas). For // compatibility we accept but warn. if (Arg *A = Args.getLastArgNoClaim(options::OPT_mabi_EQ)) A->ignoreTargetSpecific(); break; } case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: { CmdArgs.push_back( getToolChain().getArch() == llvm::Triple::aarch64_be ? "-EB" : "-EL"); Args.AddLastArg(CmdArgs, options::OPT_march_EQ); normalizeCPUNamesForAssembler(Args, CmdArgs); break; } // TODO: handle loongarch32. case llvm::Triple::loongarch64: { StringRef ABIName = loongarch::getLoongArchABI(D, Args, getToolChain().getTriple()); CmdArgs.push_back(Args.MakeArgString("-mabi=" + ABIName)); break; } case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { StringRef CPUName; StringRef ABIName; mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName); ABIName = mips::getGnuCompatibleMipsABIName(ABIName); CmdArgs.push_back("-march"); CmdArgs.push_back(CPUName.data()); CmdArgs.push_back("-mabi"); CmdArgs.push_back(ABIName.data()); // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE, // or -mshared (not implemented) is in effect. if (RelocationModel == llvm::Reloc::Static) CmdArgs.push_back("-mno-shared"); // LLVM doesn't support -mplt yet and acts as if it is always given. // However, -mplt has no effect with the N64 ABI. if (ABIName != "64" && !Args.hasArg(options::OPT_mno_abicalls)) CmdArgs.push_back("-call_nonpic"); if (getToolChain().getTriple().isLittleEndian()) CmdArgs.push_back("-EL"); else CmdArgs.push_back("-EB"); if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) { if (StringRef(A->getValue()) == "2008") CmdArgs.push_back(Args.MakeArgString("-mnan=2008")); } // Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default. if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx, options::OPT_mfp64)) { A->claim(); A->render(Args, CmdArgs); } else if (mips::shouldUseFPXX( Args, getToolChain().getTriple(), CPUName, ABIName, mips::getMipsFloatABI(getToolChain().getDriver(), Args, getToolChain().getTriple()))) CmdArgs.push_back("-mfpxx"); // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of // -mno-mips16 is actually -no-mips16. if (Arg *A = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) { if (A->getOption().matches(options::OPT_mips16)) { A->claim(); A->render(Args, CmdArgs); } else { A->claim(); CmdArgs.push_back("-no-mips16"); } } Args.AddLastArg(CmdArgs, options::OPT_mmicromips, options::OPT_mno_micromips); Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp); Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2); if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) { // Do not use AddLastArg because not all versions of MIPS assembler // support -mmsa / -mno-msa options. if (A->getOption().matches(options::OPT_mmsa)) CmdArgs.push_back(Args.MakeArgString("-mmsa")); } Args.AddLastArg(CmdArgs, options::OPT_mhard_float, options::OPT_msoft_float); Args.AddLastArg(CmdArgs, options::OPT_mdouble_float, options::OPT_msingle_float); Args.AddLastArg(CmdArgs, options::OPT_modd_spreg, options::OPT_mno_odd_spreg); AddAssemblerKPIC(getToolChain(), Args, CmdArgs); break; } case llvm::Triple::systemz: { // Always pass an -march option, since our default of z10 is later // than the GNU assembler's default. std::string CPUName = systemz::getSystemZTargetCPU(Args); CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName)); break; } case llvm::Triple::ve: DefaultAssembler = "nas"; } for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ, options::OPT_fdebug_prefix_map_EQ)) { StringRef Map = A->getValue(); if (!Map.contains('=')) D.Diag(diag::err_drv_invalid_argument_to_option) << Map << A->getOption().getName(); else { CmdArgs.push_back(Args.MakeArgString("--debug-prefix-map")); CmdArgs.push_back(Args.MakeArgString(Map)); } A->claim(); } Args.AddAllArgs(CmdArgs, options::OPT_I); Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); for (const auto &II : Inputs) CmdArgs.push_back(II.getFilename()); if (Arg *A = Args.getLastArg(options::OPT_g_Flag, options::OPT_gN_Group, options::OPT_gdwarf_2, options::OPT_gdwarf_3, options::OPT_gdwarf_4, options::OPT_gdwarf_5, options::OPT_gdwarf)) if (!A->getOption().matches(options::OPT_g0)) { Args.AddLastArg(CmdArgs, options::OPT_g_Flag); unsigned DwarfVersion = getDwarfVersion(getToolChain(), Args); CmdArgs.push_back(Args.MakeArgString("-gdwarf-" + Twine(DwarfVersion))); } const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(DefaultAssembler)); C.addCommand(std::make_unique(JA, *this, ResponseFileSupport::AtFileCurCP(), Exec, CmdArgs, Inputs, Output)); // Handle the debug info splitting at object creation time if we're // creating an object. // TODO: Currently only works on linux with newer objcopy. if (Args.hasArg(options::OPT_gsplit_dwarf) && getToolChain().getTriple().isOSLinux()) SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output, SplitDebugName(JA, Args, Inputs[0], Output)); } namespace { // Filter to remove Multilibs that don't exist as a suffix to Path class FilterNonExistent { StringRef Base, File; llvm::vfs::FileSystem &VFS; public: FilterNonExistent(StringRef Base, StringRef File, llvm::vfs::FileSystem &VFS) : Base(Base), File(File), VFS(VFS) {} bool operator()(const Multilib &M) { return !VFS.exists(Base + M.gccSuffix() + File); } }; } // end anonymous namespace static bool isSoftFloatABI(const ArgList &Args) { Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float, options::OPT_mfloat_abi_EQ); if (!A) return false; return A->getOption().matches(options::OPT_msoft_float) || (A->getOption().matches(options::OPT_mfloat_abi_EQ) && A->getValue() == StringRef("soft")); } static bool isArmOrThumbArch(llvm::Triple::ArchType Arch) { return Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb; } static bool isMipsEL(llvm::Triple::ArchType Arch) { return Arch == llvm::Triple::mipsel || Arch == llvm::Triple::mips64el; } static bool isMips16(const ArgList &Args) { Arg *A = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16); return A && A->getOption().matches(options::OPT_mips16); } static bool isMicroMips(const ArgList &Args) { Arg *A = Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips); return A && A->getOption().matches(options::OPT_mmicromips); } static bool isMSP430(llvm::Triple::ArchType Arch) { return Arch == llvm::Triple::msp430; } static bool findMipsCsMultilibs(const Multilib::flags_list &Flags, FilterNonExistent &NonExistent, DetectedMultilibs &Result) { // Check for Code Sourcery toolchain multilibs MultilibSet CSMipsMultilibs; { auto MArchMips16 = MultilibBuilder("/mips16").flag("-m32").flag("-mips16"); auto MArchMicroMips = MultilibBuilder("/micromips").flag("-m32").flag("-mmicromips"); auto MArchDefault = MultilibBuilder("") .flag("-mips16", /*Disallow=*/true) .flag("-mmicromips", /*Disallow=*/true); auto UCLibc = MultilibBuilder("/uclibc").flag("-muclibc"); auto SoftFloat = MultilibBuilder("/soft-float").flag("-msoft-float"); auto Nan2008 = MultilibBuilder("/nan2008").flag("-mnan=2008"); auto DefaultFloat = MultilibBuilder("") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008", /*Disallow=*/true); auto BigEndian = MultilibBuilder("").flag("-EB").flag("-EL", /*Disallow=*/true); auto LittleEndian = MultilibBuilder("/el").flag("-EL").flag("-EB", /*Disallow=*/true); // Note that this one's osSuffix is "" auto MAbi64 = MultilibBuilder("") .gccSuffix("/64") .includeSuffix("/64") .flag("-mabi=n64") .flag("-mabi=n32", /*Disallow=*/true) .flag("-m32", /*Disallow=*/true); CSMipsMultilibs = MultilibSetBuilder() .Either(MArchMips16, MArchMicroMips, MArchDefault) .Maybe(UCLibc) .Either(SoftFloat, Nan2008, DefaultFloat) .FilterOut("/micromips/nan2008") .FilterOut("/mips16/nan2008") .Either(BigEndian, LittleEndian) .Maybe(MAbi64) .FilterOut("/mips16.*/64") .FilterOut("/micromips.*/64") .makeMultilibSet() .FilterOut(NonExistent) .setIncludeDirsCallback([](const Multilib &M) { std::vector Dirs({"/include"}); if (StringRef(M.includeSuffix()).starts_with("/uclibc")) Dirs.push_back( "/../../../../mips-linux-gnu/libc/uclibc/usr/include"); else Dirs.push_back("/../../../../mips-linux-gnu/libc/usr/include"); return Dirs; }); } MultilibSet DebianMipsMultilibs; { MultilibBuilder MAbiN32 = MultilibBuilder().gccSuffix("/n32").includeSuffix("/n32").flag( "-mabi=n32"); MultilibBuilder M64 = MultilibBuilder() .gccSuffix("/64") .includeSuffix("/64") .flag("-m64") .flag("-m32", /*Disallow=*/true) .flag("-mabi=n32", /*Disallow=*/true); MultilibBuilder M32 = MultilibBuilder() .gccSuffix("/32") .flag("-m64", /*Disallow=*/true) .flag("-m32") .flag("-mabi=n32", /*Disallow=*/true); DebianMipsMultilibs = MultilibSetBuilder() .Either(M32, M64, MAbiN32) .makeMultilibSet() .FilterOut(NonExistent); } // Sort candidates. Toolchain that best meets the directories tree goes first. // Then select the first toolchains matches command line flags. MultilibSet *Candidates[] = {&CSMipsMultilibs, &DebianMipsMultilibs}; if (CSMipsMultilibs.size() < DebianMipsMultilibs.size()) std::iter_swap(Candidates, Candidates + 1); for (const MultilibSet *Candidate : Candidates) { if (Candidate->select(Flags, Result.SelectedMultilibs)) { if (Candidate == &DebianMipsMultilibs) Result.BiarchSibling = Multilib(); Result.Multilibs = *Candidate; return true; } } return false; } static bool findMipsAndroidMultilibs(llvm::vfs::FileSystem &VFS, StringRef Path, const Multilib::flags_list &Flags, FilterNonExistent &NonExistent, DetectedMultilibs &Result) { MultilibSet AndroidMipsMultilibs = MultilibSetBuilder() .Maybe(MultilibBuilder("/mips-r2", {}, {}).flag("-march=mips32r2")) .Maybe(MultilibBuilder("/mips-r6", {}, {}).flag("-march=mips32r6")) .makeMultilibSet() .FilterOut(NonExistent); MultilibSet AndroidMipselMultilibs = MultilibSetBuilder() .Either(MultilibBuilder().flag("-march=mips32"), MultilibBuilder("/mips-r2", "", "/mips-r2") .flag("-march=mips32r2"), MultilibBuilder("/mips-r6", "", "/mips-r6") .flag("-march=mips32r6")) .makeMultilibSet() .FilterOut(NonExistent); MultilibSet AndroidMips64elMultilibs = MultilibSetBuilder() .Either(MultilibBuilder().flag("-march=mips64r6"), MultilibBuilder("/32/mips-r1", "", "/mips-r1") .flag("-march=mips32"), MultilibBuilder("/32/mips-r2", "", "/mips-r2") .flag("-march=mips32r2"), MultilibBuilder("/32/mips-r6", "", "/mips-r6") .flag("-march=mips32r6")) .makeMultilibSet() .FilterOut(NonExistent); MultilibSet *MS = &AndroidMipsMultilibs; if (VFS.exists(Path + "/mips-r6")) MS = &AndroidMipselMultilibs; else if (VFS.exists(Path + "/32")) MS = &AndroidMips64elMultilibs; if (MS->select(Flags, Result.SelectedMultilibs)) { Result.Multilibs = *MS; return true; } return false; } static bool findMipsMuslMultilibs(const Multilib::flags_list &Flags, FilterNonExistent &NonExistent, DetectedMultilibs &Result) { // Musl toolchain multilibs MultilibSet MuslMipsMultilibs; { auto MArchMipsR2 = MultilibBuilder("") .osSuffix("/mips-r2-hard-musl") .flag("-EB") .flag("-EL", /*Disallow=*/true) .flag("-march=mips32r2"); auto MArchMipselR2 = MultilibBuilder("/mipsel-r2-hard-musl") .flag("-EB", /*Disallow=*/true) .flag("-EL") .flag("-march=mips32r2"); MuslMipsMultilibs = MultilibSetBuilder() .Either(MArchMipsR2, MArchMipselR2) .makeMultilibSet(); // Specify the callback that computes the include directories. MuslMipsMultilibs.setIncludeDirsCallback([](const Multilib &M) { return std::vector( {"/../sysroot" + M.osSuffix() + "/usr/include"}); }); } if (MuslMipsMultilibs.select(Flags, Result.SelectedMultilibs)) { Result.Multilibs = MuslMipsMultilibs; return true; } return false; } static bool findMipsMtiMultilibs(const Multilib::flags_list &Flags, FilterNonExistent &NonExistent, DetectedMultilibs &Result) { // CodeScape MTI toolchain v1.2 and early. MultilibSet MtiMipsMultilibsV1; { auto MArchMips32 = MultilibBuilder("/mips32") .flag("-m32") .flag("-m64", /*Disallow=*/true) .flag("-mmicromips", /*Disallow=*/true) .flag("-march=mips32"); auto MArchMicroMips = MultilibBuilder("/micromips") .flag("-m32") .flag("-m64", /*Disallow=*/true) .flag("-mmicromips"); auto MArchMips64r2 = MultilibBuilder("/mips64r2") .flag("-m32", /*Disallow=*/true) .flag("-m64") .flag("-march=mips64r2"); auto MArchMips64 = MultilibBuilder("/mips64") .flag("-m32", /*Disallow=*/true) .flag("-m64") .flag("-march=mips64r2", /*Disallow=*/true); auto MArchDefault = MultilibBuilder("") .flag("-m32") .flag("-m64", /*Disallow=*/true) .flag("-mmicromips", /*Disallow=*/true) .flag("-march=mips32r2"); auto Mips16 = MultilibBuilder("/mips16").flag("-mips16"); auto UCLibc = MultilibBuilder("/uclibc").flag("-muclibc"); auto MAbi64 = MultilibBuilder("/64") .flag("-mabi=n64") .flag("-mabi=n32", /*Disallow=*/true) .flag("-m32", /*Disallow=*/true); auto BigEndian = MultilibBuilder("").flag("-EB").flag("-EL", /*Disallow=*/true); auto LittleEndian = MultilibBuilder("/el").flag("-EL").flag("-EB", /*Disallow=*/true); auto SoftFloat = MultilibBuilder("/sof").flag("-msoft-float"); auto Nan2008 = MultilibBuilder("/nan2008").flag("-mnan=2008"); MtiMipsMultilibsV1 = MultilibSetBuilder() .Either(MArchMips32, MArchMicroMips, MArchMips64r2, MArchMips64, MArchDefault) .Maybe(UCLibc) .Maybe(Mips16) .FilterOut("/mips64/mips16") .FilterOut("/mips64r2/mips16") .FilterOut("/micromips/mips16") .Maybe(MAbi64) .FilterOut("/micromips/64") .FilterOut("/mips32/64") .FilterOut("^/64") .FilterOut("/mips16/64") .Either(BigEndian, LittleEndian) .Maybe(SoftFloat) .Maybe(Nan2008) .FilterOut(".*sof/nan2008") .makeMultilibSet() .FilterOut(NonExistent) .setIncludeDirsCallback([](const Multilib &M) { std::vector Dirs({"/include"}); if (StringRef(M.includeSuffix()).starts_with("/uclibc")) Dirs.push_back("/../../../../sysroot/uclibc/usr/include"); else Dirs.push_back("/../../../../sysroot/usr/include"); return Dirs; }); } // CodeScape IMG toolchain starting from v1.3. MultilibSet MtiMipsMultilibsV2; { auto BeHard = MultilibBuilder("/mips-r2-hard") .flag("-EB") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008", /*Disallow=*/true) .flag("-muclibc", /*Disallow=*/true); auto BeSoft = MultilibBuilder("/mips-r2-soft") .flag("-EB") .flag("-msoft-float") .flag("-mnan=2008", /*Disallow=*/true); auto ElHard = MultilibBuilder("/mipsel-r2-hard") .flag("-EL") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008", /*Disallow=*/true) .flag("-muclibc", /*Disallow=*/true); auto ElSoft = MultilibBuilder("/mipsel-r2-soft") .flag("-EL") .flag("-msoft-float") .flag("-mnan=2008", /*Disallow=*/true) .flag("-mmicromips", /*Disallow=*/true); auto BeHardNan = MultilibBuilder("/mips-r2-hard-nan2008") .flag("-EB") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008") .flag("-muclibc", /*Disallow=*/true); auto ElHardNan = MultilibBuilder("/mipsel-r2-hard-nan2008") .flag("-EL") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008") .flag("-muclibc", /*Disallow=*/true) .flag("-mmicromips", /*Disallow=*/true); auto BeHardNanUclibc = MultilibBuilder("/mips-r2-hard-nan2008-uclibc") .flag("-EB") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008") .flag("-muclibc"); auto ElHardNanUclibc = MultilibBuilder("/mipsel-r2-hard-nan2008-uclibc") .flag("-EL") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008") .flag("-muclibc"); auto BeHardUclibc = MultilibBuilder("/mips-r2-hard-uclibc") .flag("-EB") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008", /*Disallow=*/true) .flag("-muclibc"); auto ElHardUclibc = MultilibBuilder("/mipsel-r2-hard-uclibc") .flag("-EL") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008", /*Disallow=*/true) .flag("-muclibc"); auto ElMicroHardNan = MultilibBuilder("/micromipsel-r2-hard-nan2008") .flag("-EL") .flag("-msoft-float", /*Disallow=*/true) .flag("-mnan=2008") .flag("-mmicromips"); auto ElMicroSoft = MultilibBuilder("/micromipsel-r2-soft") .flag("-EL") .flag("-msoft-float") .flag("-mnan=2008", /*Disallow=*/true) .flag("-mmicromips"); auto O32 = MultilibBuilder("/lib") .osSuffix("") .flag("-mabi=n32", /*Disallow=*/true) .flag("-mabi=n64", /*Disallow=*/true); auto N32 = MultilibBuilder("/lib32") .osSuffix("") .flag("-mabi=n32") .flag("-mabi=n64", /*Disallow=*/true); auto N64 = MultilibBuilder("/lib64") .osSuffix("") .flag("-mabi=n32", /*Disallow=*/true) .flag("-mabi=n64"); MtiMipsMultilibsV2 = MultilibSetBuilder() .Either({BeHard, BeSoft, ElHard, ElSoft, BeHardNan, ElHardNan, BeHardNanUclibc, ElHardNanUclibc, BeHardUclibc, ElHardUclibc, ElMicroHardNan, ElMicroSoft}) .Either(O32, N32, N64) .makeMultilibSet() .FilterOut(NonExistent) .setIncludeDirsCallback([](const Multilib &M) { return std::vector({"/../../../../sysroot" + M.includeSuffix() + "/../usr/include"}); }) .setFilePathsCallback([](const Multilib &M) { return std::vector( {"/../../../../mips-mti-linux-gnu/lib" + M.gccSuffix()}); }); } for (auto *Candidate : {&MtiMipsMultilibsV1, &MtiMipsMultilibsV2}) { if (Candidate->select(Flags, Result.SelectedMultilibs)) { Result.Multilibs = *Candidate; return true; } } return false; } static bool findMipsImgMultilibs(const Multilib::flags_list &Flags, FilterNonExistent &NonExistent, DetectedMultilibs &Result) { // CodeScape IMG toolchain v1.2 and early. MultilibSet ImgMultilibsV1; { auto Mips64r6 = MultilibBuilder("/mips64r6") .flag("-m64") .flag("-m32", /*Disallow=*/true); auto LittleEndian = MultilibBuilder("/el").flag("-EL").flag("-EB", /*Disallow=*/true); auto MAbi64 = MultilibBuilder("/64") .flag("-mabi=n64") .flag("-mabi=n32", /*Disallow=*/true) .flag("-m32", /*Disallow=*/true); ImgMultilibsV1 = MultilibSetBuilder() .Maybe(Mips64r6) .Maybe(MAbi64) .Maybe(LittleEndian) .makeMultilibSet() .FilterOut(NonExistent) .setIncludeDirsCallback([](const Multilib &M) { return std::vector( {"/include", "/../../../../sysroot/usr/include"}); }); } // CodeScape IMG toolchain starting from v1.3. MultilibSet ImgMultilibsV2; { auto BeHard = MultilibBuilder("/mips-r6-hard") .flag("-EB") .flag("-msoft-float", /*Disallow=*/true) .flag("-mmicromips", /*Disallow=*/true); auto BeSoft = MultilibBuilder("/mips-r6-soft") .flag("-EB") .flag("-msoft-float") .flag("-mmicromips", /*Disallow=*/true); auto ElHard = MultilibBuilder("/mipsel-r6-hard") .flag("-EL") .flag("-msoft-float", /*Disallow=*/true) .flag("-mmicromips", /*Disallow=*/true); auto ElSoft = MultilibBuilder("/mipsel-r6-soft") .flag("-EL") .flag("-msoft-float") .flag("-mmicromips", /*Disallow=*/true); auto BeMicroHard = MultilibBuilder("/micromips-r6-hard") .flag("-EB") .flag("-msoft-float", /*Disallow=*/true) .flag("-mmicromips"); auto BeMicroSoft = MultilibBuilder("/micromips-r6-soft") .flag("-EB") .flag("-msoft-float") .flag("-mmicromips"); auto ElMicroHard = MultilibBuilder("/micromipsel-r6-hard") .flag("-EL") .flag("-msoft-float", /*Disallow=*/true) .flag("-mmicromips"); auto ElMicroSoft = MultilibBuilder("/micromipsel-r6-soft") .flag("-EL") .flag("-msoft-float") .flag("-mmicromips"); auto O32 = MultilibBuilder("/lib") .osSuffix("") .flag("-mabi=n32", /*Disallow=*/true) .flag("-mabi=n64", /*Disallow=*/true); auto N32 = MultilibBuilder("/lib32") .osSuffix("") .flag("-mabi=n32") .flag("-mabi=n64", /*Disallow=*/true); auto N64 = MultilibBuilder("/lib64") .osSuffix("") .flag("-mabi=n32", /*Disallow=*/true) .flag("-mabi=n64"); ImgMultilibsV2 = MultilibSetBuilder() .Either({BeHard, BeSoft, ElHard, ElSoft, BeMicroHard, BeMicroSoft, ElMicroHard, ElMicroSoft}) .Either(O32, N32, N64) .makeMultilibSet() .FilterOut(NonExistent) .setIncludeDirsCallback([](const Multilib &M) { return std::vector({"/../../../../sysroot" + M.includeSuffix() + "/../usr/include"}); }) .setFilePathsCallback([](const Multilib &M) { return std::vector( {"/../../../../mips-img-linux-gnu/lib" + M.gccSuffix()}); }); } for (auto *Candidate : {&ImgMultilibsV1, &ImgMultilibsV2}) { if (Candidate->select(Flags, Result.SelectedMultilibs)) { Result.Multilibs = *Candidate; return true; } } return false; } bool clang::driver::findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple, StringRef Path, const ArgList &Args, DetectedMultilibs &Result) { FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS()); StringRef CPUName; StringRef ABIName; tools::mips::getMipsCPUAndABI(Args, TargetTriple, CPUName, ABIName); llvm::Triple::ArchType TargetArch = TargetTriple.getArch(); Multilib::flags_list Flags; addMultilibFlag(TargetTriple.isMIPS32(), "-m32", Flags); addMultilibFlag(TargetTriple.isMIPS64(), "-m64", Flags); addMultilibFlag(isMips16(Args), "-mips16", Flags); addMultilibFlag(CPUName == "mips32", "-march=mips32", Flags); addMultilibFlag(CPUName == "mips32r2" || CPUName == "mips32r3" || CPUName == "mips32r5" || CPUName == "p5600", "-march=mips32r2", Flags); addMultilibFlag(CPUName == "mips32r6", "-march=mips32r6", Flags); addMultilibFlag(CPUName == "mips64", "-march=mips64", Flags); addMultilibFlag(CPUName == "mips64r2" || CPUName == "mips64r3" || CPUName == "mips64r5" || CPUName == "octeon" || CPUName == "octeon+", "-march=mips64r2", Flags); addMultilibFlag(CPUName == "mips64r6", "-march=mips64r6", Flags); addMultilibFlag(isMicroMips(Args), "-mmicromips", Flags); addMultilibFlag(tools::mips::isUCLibc(Args), "-muclibc", Flags); addMultilibFlag(tools::mips::isNaN2008(D, Args, TargetTriple), "-mnan=2008", Flags); addMultilibFlag(ABIName == "n32", "-mabi=n32", Flags); addMultilibFlag(ABIName == "n64", "-mabi=n64", Flags); addMultilibFlag(isSoftFloatABI(Args), "-msoft-float", Flags); addMultilibFlag(!isSoftFloatABI(Args), "-mhard-float", Flags); addMultilibFlag(isMipsEL(TargetArch), "-EL", Flags); addMultilibFlag(!isMipsEL(TargetArch), "-EB", Flags); if (TargetTriple.isAndroid()) return findMipsAndroidMultilibs(D.getVFS(), Path, Flags, NonExistent, Result); if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies && TargetTriple.getOS() == llvm::Triple::Linux && TargetTriple.getEnvironment() == llvm::Triple::UnknownEnvironment) return findMipsMuslMultilibs(Flags, NonExistent, Result); if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies && TargetTriple.getOS() == llvm::Triple::Linux && TargetTriple.isGNUEnvironment()) return findMipsMtiMultilibs(Flags, NonExistent, Result); if (TargetTriple.getVendor() == llvm::Triple::ImaginationTechnologies && TargetTriple.getOS() == llvm::Triple::Linux && TargetTriple.isGNUEnvironment()) return findMipsImgMultilibs(Flags, NonExistent, Result); if (findMipsCsMultilibs(Flags, NonExistent, Result)) return true; // Fallback to the regular toolchain-tree structure. Multilib Default; Result.Multilibs.push_back(Default); Result.Multilibs.FilterOut(NonExistent); if (Result.Multilibs.select(Flags, Result.SelectedMultilibs)) { Result.BiarchSibling = Multilib(); return true; } return false; } static void findAndroidArmMultilibs(const Driver &D, const llvm::Triple &TargetTriple, StringRef Path, const ArgList &Args, DetectedMultilibs &Result) { // Find multilibs with subdirectories like armv7-a, thumb, armv7-a/thumb. FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS()); MultilibBuilder ArmV7Multilib = MultilibBuilder("/armv7-a") .flag("-march=armv7-a") .flag("-mthumb", /*Disallow=*/true); MultilibBuilder ThumbMultilib = MultilibBuilder("/thumb") .flag("-march=armv7-a", /*Disallow=*/true) .flag("-mthumb"); MultilibBuilder ArmV7ThumbMultilib = MultilibBuilder("/armv7-a/thumb").flag("-march=armv7-a").flag("-mthumb"); MultilibBuilder DefaultMultilib = MultilibBuilder("") .flag("-march=armv7-a", /*Disallow=*/true) .flag("-mthumb", /*Disallow=*/true); MultilibSet AndroidArmMultilibs = MultilibSetBuilder() .Either(ThumbMultilib, ArmV7Multilib, ArmV7ThumbMultilib, DefaultMultilib) .makeMultilibSet() .FilterOut(NonExistent); Multilib::flags_list Flags; llvm::StringRef Arch = Args.getLastArgValue(options::OPT_march_EQ); bool IsArmArch = TargetTriple.getArch() == llvm::Triple::arm; bool IsThumbArch = TargetTriple.getArch() == llvm::Triple::thumb; bool IsV7SubArch = TargetTriple.getSubArch() == llvm::Triple::ARMSubArch_v7; bool IsThumbMode = IsThumbArch || Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, false) || (IsArmArch && llvm::ARM::parseArchISA(Arch) == llvm::ARM::ISAKind::THUMB); bool IsArmV7Mode = (IsArmArch || IsThumbArch) && (llvm::ARM::parseArchVersion(Arch) == 7 || (IsArmArch && Arch == "" && IsV7SubArch)); addMultilibFlag(IsArmV7Mode, "-march=armv7-a", Flags); addMultilibFlag(IsThumbMode, "-mthumb", Flags); if (AndroidArmMultilibs.select(Flags, Result.SelectedMultilibs)) Result.Multilibs = AndroidArmMultilibs; } static bool findMSP430Multilibs(const Driver &D, const llvm::Triple &TargetTriple, StringRef Path, const ArgList &Args, DetectedMultilibs &Result) { FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS()); MultilibBuilder WithoutExceptions = MultilibBuilder("/430").flag("-exceptions", /*Disallow=*/true); MultilibBuilder WithExceptions = MultilibBuilder("/430/exceptions").flag("-exceptions"); // FIXME: when clang starts to support msp430x ISA additional logic // to select between multilib must be implemented // MultilibBuilder MSP430xMultilib = MultilibBuilder("/large"); Result.Multilibs.push_back(WithoutExceptions.makeMultilib()); Result.Multilibs.push_back(WithExceptions.makeMultilib()); Result.Multilibs.FilterOut(NonExistent); Multilib::flags_list Flags; addMultilibFlag(Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, false), "-exceptions", Flags); if (Result.Multilibs.select(Flags, Result.SelectedMultilibs)) return true; return false; } static void findCSKYMultilibs(const Driver &D, const llvm::Triple &TargetTriple, StringRef Path, const ArgList &Args, DetectedMultilibs &Result) { FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS()); tools::csky::FloatABI TheFloatABI = tools::csky::getCSKYFloatABI(D, Args); std::optional Res = tools::csky::getCSKYArchName(D, Args, TargetTriple); if (!Res) return; auto ARCHName = *Res; Multilib::flags_list Flags; addMultilibFlag(TheFloatABI == tools::csky::FloatABI::Hard, "-hard-fp", Flags); addMultilibFlag(TheFloatABI == tools::csky::FloatABI::SoftFP, "-soft-fp", Flags); addMultilibFlag(TheFloatABI == tools::csky::FloatABI::Soft, "-soft", Flags); addMultilibFlag(ARCHName == "ck801", "-march=ck801", Flags); addMultilibFlag(ARCHName == "ck802", "-march=ck802", Flags); addMultilibFlag(ARCHName == "ck803", "-march=ck803", Flags); addMultilibFlag(ARCHName == "ck804", "-march=ck804", Flags); addMultilibFlag(ARCHName == "ck805", "-march=ck805", Flags); addMultilibFlag(ARCHName == "ck807", "-march=ck807", Flags); addMultilibFlag(ARCHName == "ck810", "-march=ck810", Flags); addMultilibFlag(ARCHName == "ck810v", "-march=ck810v", Flags); addMultilibFlag(ARCHName == "ck860", "-march=ck860", Flags); addMultilibFlag(ARCHName == "ck860v", "-march=ck860v", Flags); bool isBigEndian = false; if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, options::OPT_mbig_endian)) isBigEndian = !A->getOption().matches(options::OPT_mlittle_endian); addMultilibFlag(isBigEndian, "-EB", Flags); auto HardFloat = MultilibBuilder("/hard-fp").flag("-hard-fp"); auto SoftFpFloat = MultilibBuilder("/soft-fp").flag("-soft-fp"); auto SoftFloat = MultilibBuilder("").flag("-soft"); auto Arch801 = MultilibBuilder("/ck801").flag("-march=ck801"); auto Arch802 = MultilibBuilder("/ck802").flag("-march=ck802"); auto Arch803 = MultilibBuilder("/ck803").flag("-march=ck803"); // CK804 use the same library as CK803 auto Arch804 = MultilibBuilder("/ck803").flag("-march=ck804"); auto Arch805 = MultilibBuilder("/ck805").flag("-march=ck805"); auto Arch807 = MultilibBuilder("/ck807").flag("-march=ck807"); auto Arch810 = MultilibBuilder("").flag("-march=ck810"); auto Arch810v = MultilibBuilder("/ck810v").flag("-march=ck810v"); auto Arch860 = MultilibBuilder("/ck860").flag("-march=ck860"); auto Arch860v = MultilibBuilder("/ck860v").flag("-march=ck860v"); auto BigEndian = MultilibBuilder("/big").flag("-EB"); MultilibSet CSKYMultilibs = MultilibSetBuilder() .Maybe(BigEndian) .Either({Arch801, Arch802, Arch803, Arch804, Arch805, Arch807, Arch810, Arch810v, Arch860, Arch860v}) .Either(HardFloat, SoftFpFloat, SoftFloat) .makeMultilibSet() .FilterOut(NonExistent); if (CSKYMultilibs.select(Flags, Result.SelectedMultilibs)) Result.Multilibs = CSKYMultilibs; } /// Extend the multi-lib re-use selection mechanism for RISC-V. /// This function will try to re-use multi-lib if they are compatible. /// Definition of compatible: /// - ABI must be the same. /// - multi-lib is a subset of current arch, e.g. multi-lib=march=rv32im /// is a subset of march=rv32imc. /// - march that contains atomic extension can't reuse multi-lib that /// doesn't have atomic, vice versa. e.g. multi-lib=march=rv32im and /// march=rv32ima are not compatible, because software and hardware /// atomic operation can't work together correctly. static bool selectRISCVMultilib(const MultilibSet &RISCVMultilibSet, StringRef Arch, const Multilib::flags_list &Flags, llvm::SmallVectorImpl &SelectedMultilibs) { // Try to find the perfect matching multi-lib first. if (RISCVMultilibSet.select(Flags, SelectedMultilibs)) return true; Multilib::flags_list NewFlags; std::vector NewMultilibs; llvm::Expected> ParseResult = llvm::RISCVISAInfo::parseArchString( Arch, /*EnableExperimentalExtension=*/true, /*ExperimentalExtensionVersionCheck=*/false); // Ignore any error here, we assume it will be handled in another place. if (llvm::errorToBool(ParseResult.takeError())) return false; auto &ISAInfo = *ParseResult; addMultilibFlag(ISAInfo->getXLen() == 32, "-m32", NewFlags); addMultilibFlag(ISAInfo->getXLen() == 64, "-m64", NewFlags); // Collect all flags except march=* for (StringRef Flag : Flags) { if (Flag.starts_with("!march=") || Flag.starts_with("-march=")) continue; NewFlags.push_back(Flag.str()); } llvm::StringSet<> AllArchExts; // Reconstruct multi-lib list, and break march option into separated // extension. e.g. march=rv32im -> +i +m for (const auto &M : RISCVMultilibSet) { bool Skip = false; MultilibBuilder NewMultilib = MultilibBuilder(M.gccSuffix(), M.osSuffix(), M.includeSuffix()); for (StringRef Flag : M.flags()) { // Add back all flags except -march. if (!Flag.consume_front("-march=")) { NewMultilib.flag(Flag); continue; } // Break down -march into individual extension. llvm::Expected> MLConfigParseResult = llvm::RISCVISAInfo::parseArchString( Flag, /*EnableExperimentalExtension=*/true, /*ExperimentalExtensionVersionCheck=*/false); // Ignore any error here, we assume it will handled in another place. if (llvm::errorToBool(MLConfigParseResult.takeError())) { // We might get a parsing error if rv32e in the list, we could just skip // that and process the rest of multi-lib configs. Skip = true; continue; } auto &MLConfigISAInfo = *MLConfigParseResult; for (auto &MLConfigArchExt : MLConfigISAInfo->getExtensions()) { auto ExtName = MLConfigArchExt.first; NewMultilib.flag(Twine("-", ExtName).str()); if (AllArchExts.insert(ExtName).second) { addMultilibFlag(ISAInfo->hasExtension(ExtName), Twine("-", ExtName).str(), NewFlags); } } // Check the XLEN explicitly. if (MLConfigISAInfo->getXLen() == 32) { NewMultilib.flag("-m32"); NewMultilib.flag("-m64", /*Disallow*/ true); } else { NewMultilib.flag("-m32", /*Disallow*/ true); NewMultilib.flag("-m64"); } // Atomic extension must be explicitly checked, soft and hard atomic // operation never co-work correctly. if (!MLConfigISAInfo->hasExtension("a")) NewMultilib.flag("-a", /*Disallow*/ true); } if (Skip) continue; NewMultilibs.emplace_back(NewMultilib); } // Build an internal used only multi-lib list, used for checking any // compatible multi-lib. MultilibSet NewRISCVMultilibs = MultilibSetBuilder().Either(NewMultilibs).makeMultilibSet(); if (NewRISCVMultilibs.select(NewFlags, SelectedMultilibs)) for (const Multilib &NewSelectedM : SelectedMultilibs) for (const auto &M : RISCVMultilibSet) // Look up the corresponding multi-lib entry in original multi-lib set. if (M.gccSuffix() == NewSelectedM.gccSuffix()) return true; return false; } static void findRISCVBareMetalMultilibs(const Driver &D, const llvm::Triple &TargetTriple, StringRef Path, const ArgList &Args, DetectedMultilibs &Result) { FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS()); struct RiscvMultilib { StringRef march; StringRef mabi; }; // currently only support the set of multilibs like riscv-gnu-toolchain does. // TODO: support MULTILIB_REUSE constexpr RiscvMultilib RISCVMultilibSet[] = { {"rv32i", "ilp32"}, {"rv32im", "ilp32"}, {"rv32iac", "ilp32"}, {"rv32imac", "ilp32"}, {"rv32imafc", "ilp32f"}, {"rv64imac", "lp64"}, {"rv64imafdc", "lp64d"}}; std::vector Ms; for (auto Element : RISCVMultilibSet) { // multilib path rule is ${march}/${mabi} Ms.emplace_back( MultilibBuilder( (Twine(Element.march) + "/" + Twine(Element.mabi)).str()) .flag(Twine("-march=", Element.march).str()) .flag(Twine("-mabi=", Element.mabi).str())); } MultilibSet RISCVMultilibs = MultilibSetBuilder() .Either(Ms) .makeMultilibSet() .FilterOut(NonExistent) .setFilePathsCallback([](const Multilib &M) { return std::vector( {M.gccSuffix(), "/../../../../riscv64-unknown-elf/lib" + M.gccSuffix(), "/../../../../riscv32-unknown-elf/lib" + M.gccSuffix()}); }); Multilib::flags_list Flags; llvm::StringSet<> Added_ABIs; StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple); std::string MArch = tools::riscv::getRISCVArch(Args, TargetTriple); for (auto Element : RISCVMultilibSet) { addMultilibFlag(MArch == Element.march, Twine("-march=", Element.march).str().c_str(), Flags); if (!Added_ABIs.count(Element.mabi)) { Added_ABIs.insert(Element.mabi); addMultilibFlag(ABIName == Element.mabi, Twine("-mabi=", Element.mabi).str().c_str(), Flags); } } if (selectRISCVMultilib(RISCVMultilibs, MArch, Flags, Result.SelectedMultilibs)) Result.Multilibs = RISCVMultilibs; } static void findRISCVMultilibs(const Driver &D, const llvm::Triple &TargetTriple, StringRef Path, const ArgList &Args, DetectedMultilibs &Result) { if (TargetTriple.getOS() == llvm::Triple::UnknownOS) return findRISCVBareMetalMultilibs(D, TargetTriple, Path, Args, Result); FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS()); MultilibBuilder Ilp32 = MultilibBuilder("lib32/ilp32").flag("-m32").flag("-mabi=ilp32"); MultilibBuilder Ilp32f = MultilibBuilder("lib32/ilp32f").flag("-m32").flag("-mabi=ilp32f"); MultilibBuilder Ilp32d = MultilibBuilder("lib32/ilp32d").flag("-m32").flag("-mabi=ilp32d"); MultilibBuilder Lp64 = MultilibBuilder("lib64/lp64").flag("-m64").flag("-mabi=lp64"); MultilibBuilder Lp64f = MultilibBuilder("lib64/lp64f").flag("-m64").flag("-mabi=lp64f"); MultilibBuilder Lp64d = MultilibBuilder("lib64/lp64d").flag("-m64").flag("-mabi=lp64d"); MultilibSet RISCVMultilibs = MultilibSetBuilder() .Either({Ilp32, Ilp32f, Ilp32d, Lp64, Lp64f, Lp64d}) .makeMultilibSet() .FilterOut(NonExistent); Multilib::flags_list Flags; bool IsRV64 = TargetTriple.getArch() == llvm::Triple::riscv64; StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple); addMultilibFlag(!IsRV64, "-m32", Flags); addMultilibFlag(IsRV64, "-m64", Flags); addMultilibFlag(ABIName == "ilp32", "-mabi=ilp32", Flags); addMultilibFlag(ABIName == "ilp32f", "-mabi=ilp32f", Flags); addMultilibFlag(ABIName == "ilp32d", "-mabi=ilp32d", Flags); addMultilibFlag(ABIName == "lp64", "-mabi=lp64", Flags); addMultilibFlag(ABIName == "lp64f", "-mabi=lp64f", Flags); addMultilibFlag(ABIName == "lp64d", "-mabi=lp64d", Flags); if (RISCVMultilibs.select(Flags, Result.SelectedMultilibs)) Result.Multilibs = RISCVMultilibs; } static bool findBiarchMultilibs(const Driver &D, const llvm::Triple &TargetTriple, StringRef Path, const ArgList &Args, bool NeedsBiarchSuffix, DetectedMultilibs &Result) { MultilibBuilder DefaultBuilder; // Some versions of SUSE and Fedora on ppc64 put 32-bit libs // in what would normally be GCCInstallPath and put the 64-bit // libs in a subdirectory named 64. The simple logic we follow is that // *if* there is a subdirectory of the right name with crtbegin.o in it, // we use that. If not, and if not a biarch triple alias, we look for // crtbegin.o without the subdirectory. StringRef Suff64 = "/64"; // Solaris uses platform-specific suffixes instead of /64. if (TargetTriple.isOSSolaris()) { switch (TargetTriple.getArch()) { case llvm::Triple::x86: case llvm::Triple::x86_64: Suff64 = "/amd64"; break; case llvm::Triple::sparc: case llvm::Triple::sparcv9: Suff64 = "/sparcv9"; break; default: break; } } Multilib Alt64 = MultilibBuilder() .gccSuffix(Suff64) .includeSuffix(Suff64) .flag("-m32", /*Disallow=*/true) .flag("-m64") .flag("-mx32", /*Disallow=*/true) .makeMultilib(); Multilib Alt32 = MultilibBuilder() .gccSuffix("/32") .includeSuffix("/32") .flag("-m32") .flag("-m64", /*Disallow=*/true) .flag("-mx32", /*Disallow=*/true) .makeMultilib(); Multilib Altx32 = MultilibBuilder() .gccSuffix("/x32") .includeSuffix("/x32") .flag("-m32", /*Disallow=*/true) .flag("-m64", /*Disallow=*/true) .flag("-mx32") .makeMultilib(); Multilib Alt32sparc = MultilibBuilder() .gccSuffix("/sparcv8plus") .includeSuffix("/sparcv8plus") .flag("-m32") .flag("-m64", /*Disallow=*/true) .makeMultilib(); // GCC toolchain for IAMCU doesn't have crtbegin.o, so look for libgcc.a. FilterNonExistent NonExistent( Path, TargetTriple.isOSIAMCU() ? "/libgcc.a" : "/crtbegin.o", D.getVFS()); // Determine default multilib from: 32, 64, x32 // Also handle cases such as 64 on 32, 32 on 64, etc. enum { UNKNOWN, WANT32, WANT64, WANTX32 } Want = UNKNOWN; const bool IsX32 = TargetTriple.isX32(); if (TargetTriple.isArch32Bit() && !NonExistent(Alt32)) Want = WANT64; if (TargetTriple.isArch32Bit() && !NonExistent(Alt32sparc)) Want = WANT64; else if (TargetTriple.isArch64Bit() && IsX32 && !NonExistent(Altx32)) Want = WANT64; else if (TargetTriple.isArch64Bit() && !IsX32 && !NonExistent(Alt64)) Want = WANT32; else if (TargetTriple.isArch64Bit() && !NonExistent(Alt32sparc)) Want = WANT64; else { if (TargetTriple.isArch32Bit()) Want = NeedsBiarchSuffix ? WANT64 : WANT32; else if (IsX32) Want = NeedsBiarchSuffix ? WANT64 : WANTX32; else Want = NeedsBiarchSuffix ? WANT32 : WANT64; } if (Want == WANT32) DefaultBuilder.flag("-m32") .flag("-m64", /*Disallow=*/true) .flag("-mx32", /*Disallow=*/true); else if (Want == WANT64) DefaultBuilder.flag("-m32", /*Disallow=*/true) .flag("-m64") .flag("-mx32", /*Disallow=*/true); else if (Want == WANTX32) DefaultBuilder.flag("-m32", /*Disallow=*/true) .flag("-m64", /*Disallow=*/true) .flag("-mx32"); else return false; Multilib Default = DefaultBuilder.makeMultilib(); Result.Multilibs.push_back(Default); Result.Multilibs.push_back(Alt64); Result.Multilibs.push_back(Alt32); Result.Multilibs.push_back(Altx32); Result.Multilibs.push_back(Alt32sparc); Result.Multilibs.FilterOut(NonExistent); Multilib::flags_list Flags; addMultilibFlag(TargetTriple.isArch64Bit() && !IsX32, "-m64", Flags); addMultilibFlag(TargetTriple.isArch32Bit(), "-m32", Flags); addMultilibFlag(TargetTriple.isArch64Bit() && IsX32, "-mx32", Flags); if (!Result.Multilibs.select(Flags, Result.SelectedMultilibs)) return false; if (Result.SelectedMultilibs.back() == Alt64 || Result.SelectedMultilibs.back() == Alt32 || Result.SelectedMultilibs.back() == Altx32 || Result.SelectedMultilibs.back() == Alt32sparc) Result.BiarchSibling = Default; return true; } /// Generic_GCC - A tool chain using the 'gcc' command to perform /// all subcommands; this relies on gcc translating the majority of /// command line options. /// Less-than for GCCVersion, implementing a Strict Weak Ordering. bool Generic_GCC::GCCVersion::isOlderThan(int RHSMajor, int RHSMinor, int RHSPatch, StringRef RHSPatchSuffix) const { if (Major != RHSMajor) return Major < RHSMajor; if (Minor != RHSMinor) { // Note that versions without a specified minor sort higher than those with // a minor. if (RHSMinor == -1) return true; if (Minor == -1) return false; return Minor < RHSMinor; } if (Patch != RHSPatch) { // Note that versions without a specified patch sort higher than those with // a patch. if (RHSPatch == -1) return true; if (Patch == -1) return false; // Otherwise just sort on the patch itself. return Patch < RHSPatch; } if (PatchSuffix != RHSPatchSuffix) { // Sort empty suffixes higher. if (RHSPatchSuffix.empty()) return true; if (PatchSuffix.empty()) return false; // Provide a lexicographic sort to make this a total ordering. return PatchSuffix < RHSPatchSuffix; } // The versions are equal. return false; } /// Parse a GCCVersion object out of a string of text. /// /// This is the primary means of forming GCCVersion objects. /*static*/ Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) { const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; std::pair First = VersionText.split('.'); std::pair Second = First.second.split('.'); StringRef MajorStr = First.first; StringRef MinorStr = Second.first; StringRef PatchStr = Second.second; GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; // Parse version number strings such as: // 5 // 4.4 // 4.4-patched // 4.4.0 // 4.4.x // 4.4.2-rc4 // 4.4.x-patched // 10-win32 // Split on '.', handle 1, 2 or 3 such segments. Each segment must contain // purely a number, except for the last one, where a non-number suffix // is stored in PatchSuffix. The third segment is allowed to not contain // a number at all. auto TryParseLastNumber = [&](StringRef Segment, int &Number, std::string &OutStr) -> bool { // Look for a number prefix and parse that, and split out any trailing // string into GoodVersion.PatchSuffix. if (size_t EndNumber = Segment.find_first_not_of("0123456789")) { StringRef NumberStr = Segment.slice(0, EndNumber); if (NumberStr.getAsInteger(10, Number) || Number < 0) return false; OutStr = NumberStr; GoodVersion.PatchSuffix = Segment.substr(EndNumber); return true; } return false; }; auto TryParseNumber = [](StringRef Segment, int &Number) -> bool { if (Segment.getAsInteger(10, Number) || Number < 0) return false; return true; }; if (MinorStr.empty()) { // If no minor string, major is the last segment if (!TryParseLastNumber(MajorStr, GoodVersion.Major, GoodVersion.MajorStr)) return BadVersion; return GoodVersion; } if (!TryParseNumber(MajorStr, GoodVersion.Major)) return BadVersion; GoodVersion.MajorStr = MajorStr; if (PatchStr.empty()) { // If no patch string, minor is the last segment if (!TryParseLastNumber(MinorStr, GoodVersion.Minor, GoodVersion.MinorStr)) return BadVersion; return GoodVersion; } if (!TryParseNumber(MinorStr, GoodVersion.Minor)) return BadVersion; GoodVersion.MinorStr = MinorStr; // For the last segment, tolerate a missing number. std::string DummyStr; TryParseLastNumber(PatchStr, GoodVersion.Patch, DummyStr); return GoodVersion; } static llvm::StringRef getGCCToolchainDir(const ArgList &Args, llvm::StringRef SysRoot) { const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_toolchain); if (A) return A->getValue(); // If we have a SysRoot, ignore GCC_INSTALL_PREFIX. // GCC_INSTALL_PREFIX specifies the gcc installation for the default // sysroot and is likely not valid with a different sysroot. if (!SysRoot.empty()) return ""; return GCC_INSTALL_PREFIX; } /// Initialize a GCCInstallationDetector from the driver. /// /// This performs all of the autodetection and sets up the various paths. /// Once constructed, a GCCInstallationDetector is essentially immutable. /// /// FIXME: We shouldn't need an explicit TargetTriple parameter here, and /// should instead pull the target out of the driver. This is currently /// necessary because the driver doesn't store the final version of the target /// triple. void Generic_GCC::GCCInstallationDetector::init( const llvm::Triple &TargetTriple, const ArgList &Args) { llvm::Triple BiarchVariantTriple = TargetTriple.isArch32Bit() ? TargetTriple.get64BitArchVariant() : TargetTriple.get32BitArchVariant(); // The library directories which may contain GCC installations. SmallVector CandidateLibDirs, CandidateBiarchLibDirs; // The compatible GCC triples for this particular architecture. SmallVector CandidateTripleAliases; SmallVector CandidateBiarchTripleAliases; // Add some triples that we want to check first. CandidateTripleAliases.push_back(TargetTriple.str()); std::string TripleNoVendor, BiarchTripleNoVendor; if (TargetTriple.getVendor() == llvm::Triple::UnknownVendor) { StringRef OSEnv = TargetTriple.getOSAndEnvironmentName(); if (TargetTriple.getEnvironment() == llvm::Triple::GNUX32) OSEnv = "linux-gnu"; TripleNoVendor = (TargetTriple.getArchName().str() + '-' + OSEnv).str(); CandidateTripleAliases.push_back(TripleNoVendor); if (BiarchVariantTriple.getArch() != llvm::Triple::UnknownArch) { BiarchTripleNoVendor = (BiarchVariantTriple.getArchName().str() + '-' + OSEnv).str(); CandidateBiarchTripleAliases.push_back(BiarchTripleNoVendor); } } CollectLibDirsAndTriples(TargetTriple, BiarchVariantTriple, CandidateLibDirs, CandidateTripleAliases, CandidateBiarchLibDirs, CandidateBiarchTripleAliases); // If --gcc-install-dir= is specified, skip filesystem detection. if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_install_dir_EQ); A && A->getValue()[0]) { StringRef InstallDir = A->getValue(); if (!ScanGCCForMultilibs(TargetTriple, Args, InstallDir, false)) { D.Diag(diag::err_drv_invalid_gcc_install_dir) << InstallDir; } else { (void)InstallDir.consume_back("/"); StringRef VersionText = llvm::sys::path::filename(InstallDir); StringRef TripleText = llvm::sys::path::filename(llvm::sys::path::parent_path(InstallDir)); Version = GCCVersion::Parse(VersionText); GCCTriple.setTriple(TripleText); GCCInstallPath = std::string(InstallDir); GCCParentLibPath = GCCInstallPath + "/../../.."; IsValid = true; } return; } // If --gcc-triple is specified use this instead of trying to // auto-detect a triple. if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_triple_EQ)) { StringRef GCCTriple = A->getValue(); CandidateTripleAliases.clear(); CandidateTripleAliases.push_back(GCCTriple); } // Compute the set of prefixes for our search. SmallVector Prefixes; StringRef GCCToolchainDir = getGCCToolchainDir(Args, D.SysRoot); if (GCCToolchainDir != "") { if (GCCToolchainDir.back() == '/') GCCToolchainDir = GCCToolchainDir.drop_back(); // remove the / Prefixes.push_back(std::string(GCCToolchainDir)); } else { // If we have a SysRoot, try that first. if (!D.SysRoot.empty()) { Prefixes.push_back(D.SysRoot); AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot); } // Then look for gcc installed alongside clang. Prefixes.push_back(D.Dir + "/.."); // Next, look for prefix(es) that correspond to distribution-supplied gcc // installations. if (D.SysRoot.empty()) { // Typically /usr. AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot); } // Try to respect gcc-config on Gentoo if --gcc-toolchain is not provided. // This avoids accidentally enforcing the system GCC version when using a // custom toolchain. SmallVector GentooTestTriples; // Try to match an exact triple as target triple first. // e.g. crossdev -S x86_64-gentoo-linux-gnu will install gcc libs for // x86_64-gentoo-linux-gnu. But "clang -target x86_64-gentoo-linux-gnu" // may pick the libraries for x86_64-pc-linux-gnu even when exact matching // triple x86_64-gentoo-linux-gnu is present. GentooTestTriples.push_back(TargetTriple.str()); GentooTestTriples.append(CandidateTripleAliases.begin(), CandidateTripleAliases.end()); if (ScanGentooConfigs(TargetTriple, Args, GentooTestTriples, CandidateBiarchTripleAliases)) return; } // Loop over the various components which exist and select the best GCC // installation available. GCC installs are ranked by version number. const GCCVersion VersionZero = GCCVersion::Parse("0.0.0"); Version = VersionZero; for (const std::string &Prefix : Prefixes) { auto &VFS = D.getVFS(); if (!VFS.exists(Prefix)) continue; for (StringRef Suffix : CandidateLibDirs) { const std::string LibDir = concat(Prefix, Suffix); if (!VFS.exists(LibDir)) continue; // Maybe filter out /gcc and /gcc-cross. bool GCCDirExists = VFS.exists(LibDir + "/gcc"); bool GCCCrossDirExists = VFS.exists(LibDir + "/gcc-cross"); for (StringRef Candidate : CandidateTripleAliases) ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, false, GCCDirExists, GCCCrossDirExists); } for (StringRef Suffix : CandidateBiarchLibDirs) { const std::string LibDir = Prefix + Suffix.str(); if (!VFS.exists(LibDir)) continue; bool GCCDirExists = VFS.exists(LibDir + "/gcc"); bool GCCCrossDirExists = VFS.exists(LibDir + "/gcc-cross"); for (StringRef Candidate : CandidateBiarchTripleAliases) ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, true, GCCDirExists, GCCCrossDirExists); } // Skip other prefixes once a GCC installation is found. if (Version > VersionZero) break; } } void Generic_GCC::GCCInstallationDetector::print(raw_ostream &OS) const { for (const auto &InstallPath : CandidateGCCInstallPaths) OS << "Found candidate GCC installation: " << InstallPath << "\n"; if (!GCCInstallPath.empty()) OS << "Selected GCC installation: " << GCCInstallPath << "\n"; for (const auto &Multilib : Multilibs) OS << "Candidate multilib: " << Multilib << "\n"; if (Multilibs.size() != 0 || !SelectedMultilib.isDefault()) OS << "Selected multilib: " << SelectedMultilib << "\n"; } bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const { if (BiarchSibling) { M = *BiarchSibling; return true; } return false; } void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( const llvm::Triple &TargetTriple, SmallVectorImpl &Prefixes, StringRef SysRoot) { if (TargetTriple.isOSHaiku()) { Prefixes.push_back(concat(SysRoot, "/boot/system/develop/tools")); return; } if (TargetTriple.isOSSolaris()) { // Solaris is a special case. // The GCC installation is under // /usr/gcc/./lib/gcc//../ // so we need to find those /usr/gcc/*/lib/gcc libdirs and go with // /usr/gcc/ as a prefix. SmallVector, 8> SolarisPrefixes; std::string PrefixDir = concat(SysRoot, "/usr/gcc"); std::error_code EC; for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(PrefixDir, EC), LE; !EC && LI != LE; LI = LI.increment(EC)) { StringRef VersionText = llvm::sys::path::filename(LI->path()); GCCVersion CandidateVersion = GCCVersion::Parse(VersionText); // Filter out obviously bad entries. if (CandidateVersion.Major == -1 || CandidateVersion.isOlderThan(4, 1, 1)) continue; std::string CandidatePrefix = PrefixDir + "/" + VersionText.str(); std::string CandidateLibPath = CandidatePrefix + "/lib/gcc"; if (!D.getVFS().exists(CandidateLibPath)) continue; SolarisPrefixes.emplace_back( std::make_pair(CandidateVersion, CandidatePrefix)); } // Sort in reverse order so GCCInstallationDetector::init picks the latest. std::sort(SolarisPrefixes.rbegin(), SolarisPrefixes.rend()); for (auto p : SolarisPrefixes) Prefixes.emplace_back(p.second); return; } // For Linux, if --sysroot is not specified, look for RHEL/CentOS devtoolsets // and gcc-toolsets. if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux && D.getVFS().exists("/opt/rh")) { // TODO: We may want to remove this, since the functionality // can be achieved using config files. Prefixes.push_back("/opt/rh/gcc-toolset-12/root/usr"); Prefixes.push_back("/opt/rh/gcc-toolset-11/root/usr"); Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-12/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-11/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-10/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-9/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-8/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-7/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-6/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-4/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-3/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-2/root/usr"); } // Fall back to /usr which is used by most non-Solaris systems. Prefixes.push_back(concat(SysRoot, "/usr")); } /*static*/ void Generic_GCC::GCCInstallationDetector::CollectLibDirsAndTriples( const llvm::Triple &TargetTriple, const llvm::Triple &BiarchTriple, SmallVectorImpl &LibDirs, SmallVectorImpl &TripleAliases, SmallVectorImpl &BiarchLibDirs, SmallVectorImpl &BiarchTripleAliases) { // Declare a bunch of static data sets that we'll select between below. These // are specifically designed to always refer to string literals to avoid any // lifetime or initialization issues. // // The *Triples variables hard code some triples so that, for example, // --target=aarch64 (incomplete triple) can detect lib/aarch64-linux-gnu. // They are not needed when the user has correct LLVM_DEFAULT_TARGET_TRIPLE // and always uses the full --target (e.g. --target=aarch64-linux-gnu). The // lists should shrink over time. Please don't add more elements to *Triples. static const char *const AArch64LibDirs[] = {"/lib64", "/lib"}; static const char *const AArch64Triples[] = { "aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux", "aarch64-suse-linux"}; static const char *const AArch64beLibDirs[] = {"/lib"}; static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu"}; static const char *const ARMLibDirs[] = {"/lib"}; static const char *const ARMTriples[] = {"arm-linux-gnueabi"}; static const char *const ARMHFTriples[] = {"arm-linux-gnueabihf", "armv7hl-redhat-linux-gnueabi", "armv6hl-suse-linux-gnueabi", "armv7hl-suse-linux-gnueabi"}; static const char *const ARMebLibDirs[] = {"/lib"}; static const char *const ARMebTriples[] = {"armeb-linux-gnueabi"}; static const char *const ARMebHFTriples[] = { "armeb-linux-gnueabihf", "armebv7hl-redhat-linux-gnueabi"}; static const char *const AVRLibDirs[] = {"/lib"}; static const char *const AVRTriples[] = {"avr"}; static const char *const CSKYLibDirs[] = {"/lib"}; static const char *const CSKYTriples[] = { "csky-linux-gnuabiv2", "csky-linux-uclibcabiv2", "csky-elf-noneabiv2"}; static const char *const X86_64LibDirs[] = {"/lib64", "/lib"}; static const char *const X86_64Triples[] = { "x86_64-linux-gnu", "x86_64-unknown-linux-gnu", "x86_64-pc-linux-gnu", "x86_64-redhat-linux6E", "x86_64-redhat-linux", "x86_64-suse-linux", "x86_64-manbo-linux-gnu", "x86_64-slackware-linux", "x86_64-unknown-linux", "x86_64-amazon-linux"}; static const char *const X32Triples[] = {"x86_64-linux-gnux32", "x86_64-pc-linux-gnux32"}; static const char *const X32LibDirs[] = {"/libx32", "/lib"}; static const char *const X86LibDirs[] = {"/lib32", "/lib"}; static const char *const X86Triples[] = { "i586-linux-gnu", "i686-linux-gnu", "i686-pc-linux-gnu", "i386-redhat-linux6E", "i686-redhat-linux", "i386-redhat-linux", "i586-suse-linux", "i686-montavista-linux", }; static const char *const LoongArch64LibDirs[] = {"/lib64", "/lib"}; static const char *const LoongArch64Triples[] = { "loongarch64-linux-gnu", "loongarch64-unknown-linux-gnu"}; static const char *const M68kLibDirs[] = {"/lib"}; static const char *const M68kTriples[] = {"m68k-unknown-linux-gnu", "m68k-suse-linux"}; static const char *const MIPSLibDirs[] = {"/libo32", "/lib"}; static const char *const MIPSTriples[] = { "mips-linux-gnu", "mips-mti-linux", "mips-mti-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6-linux-gnu"}; static const char *const MIPSELLibDirs[] = {"/libo32", "/lib"}; static const char *const MIPSELTriples[] = {"mipsel-linux-gnu", "mips-img-linux-gnu"}; static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"}; static const char *const MIPS64Triples[] = { "mips-mti-linux-gnu", "mips-img-linux-gnu", "mips64-linux-gnuabi64", "mipsisa64r6-linux-gnu", "mipsisa64r6-linux-gnuabi64"}; static const char *const MIPS64ELLibDirs[] = {"/lib64", "/lib"}; static const char *const MIPS64ELTriples[] = { "mips-mti-linux-gnu", "mips-img-linux-gnu", "mips64el-linux-gnuabi64", "mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64"}; static const char *const MIPSN32LibDirs[] = {"/lib32"}; static const char *const MIPSN32Triples[] = {"mips64-linux-gnuabin32", "mipsisa64r6-linux-gnuabin32"}; static const char *const MIPSN32ELLibDirs[] = {"/lib32"}; static const char *const MIPSN32ELTriples[] = { "mips64el-linux-gnuabin32", "mipsisa64r6el-linux-gnuabin32"}; static const char *const MSP430LibDirs[] = {"/lib"}; static const char *const MSP430Triples[] = {"msp430-elf"}; static const char *const PPCLibDirs[] = {"/lib32", "/lib"}; static const char *const PPCTriples[] = { "powerpc-unknown-linux-gnu", // On 32-bit PowerPC systems running SUSE Linux, gcc is configured as a // 64-bit compiler which defaults to "-m32", hence "powerpc64-suse-linux". "powerpc64-suse-linux", "powerpc-montavista-linuxspe"}; static const char *const PPCLELibDirs[] = {"/lib32", "/lib"}; static const char *const PPCLETriples[] = {"powerpcle-unknown-linux-gnu", "powerpcle-linux-musl"}; static const char *const PPC64LibDirs[] = {"/lib64", "/lib"}; static const char *const PPC64Triples[] = {"powerpc64-unknown-linux-gnu", "powerpc64-suse-linux", "ppc64-redhat-linux"}; static const char *const PPC64LELibDirs[] = {"/lib64", "/lib"}; static const char *const PPC64LETriples[] = { "powerpc64le-unknown-linux-gnu", "powerpc64le-none-linux-gnu", "powerpc64le-suse-linux", "ppc64le-redhat-linux"}; static const char *const RISCV32LibDirs[] = {"/lib32", "/lib"}; static const char *const RISCV32Triples[] = {"riscv32-unknown-linux-gnu", "riscv32-unknown-elf"}; static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"}; static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu", "riscv64-unknown-elf"}; static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"}; static const char *const SPARCv8Triples[] = {"sparc-linux-gnu", "sparcv8-linux-gnu"}; static const char *const SPARCv9LibDirs[] = {"/lib64", "/lib"}; static const char *const SPARCv9Triples[] = {"sparc64-linux-gnu", "sparcv9-linux-gnu"}; static const char *const SystemZLibDirs[] = {"/lib64", "/lib"}; static const char *const SystemZTriples[] = { "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu", "s390x-suse-linux", "s390x-redhat-linux"}; using std::begin; using std::end; if (TargetTriple.isOSSolaris()) { static const char *const SolarisLibDirs[] = {"/lib"}; static const char *const SolarisSparcV8Triples[] = { "sparc-sun-solaris2.11"}; static const char *const SolarisSparcV9Triples[] = { "sparcv9-sun-solaris2.11"}; static const char *const SolarisX86Triples[] = {"i386-pc-solaris2.11"}; static const char *const SolarisX86_64Triples[] = {"x86_64-pc-solaris2.11"}; LibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs)); BiarchLibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs)); switch (TargetTriple.getArch()) { case llvm::Triple::x86: TripleAliases.append(begin(SolarisX86Triples), end(SolarisX86Triples)); BiarchTripleAliases.append(begin(SolarisX86_64Triples), end(SolarisX86_64Triples)); break; case llvm::Triple::x86_64: TripleAliases.append(begin(SolarisX86_64Triples), end(SolarisX86_64Triples)); BiarchTripleAliases.append(begin(SolarisX86Triples), end(SolarisX86Triples)); break; case llvm::Triple::sparc: TripleAliases.append(begin(SolarisSparcV8Triples), end(SolarisSparcV8Triples)); BiarchTripleAliases.append(begin(SolarisSparcV9Triples), end(SolarisSparcV9Triples)); break; case llvm::Triple::sparcv9: TripleAliases.append(begin(SolarisSparcV9Triples), end(SolarisSparcV9Triples)); BiarchTripleAliases.append(begin(SolarisSparcV8Triples), end(SolarisSparcV8Triples)); break; default: break; } return; } // Android targets should not use GNU/Linux tools or libraries. if (TargetTriple.isAndroid()) { static const char *const AArch64AndroidTriples[] = { "aarch64-linux-android"}; static const char *const ARMAndroidTriples[] = {"arm-linux-androideabi"}; static const char *const X86AndroidTriples[] = {"i686-linux-android"}; static const char *const X86_64AndroidTriples[] = {"x86_64-linux-android"}; switch (TargetTriple.getArch()) { case llvm::Triple::aarch64: LibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs)); TripleAliases.append(begin(AArch64AndroidTriples), end(AArch64AndroidTriples)); break; case llvm::Triple::arm: case llvm::Triple::thumb: LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs)); TripleAliases.append(begin(ARMAndroidTriples), end(ARMAndroidTriples)); break; case llvm::Triple::x86_64: LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); TripleAliases.append(begin(X86_64AndroidTriples), end(X86_64AndroidTriples)); BiarchLibDirs.append(begin(X86LibDirs), end(X86LibDirs)); BiarchTripleAliases.append(begin(X86AndroidTriples), end(X86AndroidTriples)); break; case llvm::Triple::x86: LibDirs.append(begin(X86LibDirs), end(X86LibDirs)); TripleAliases.append(begin(X86AndroidTriples), end(X86AndroidTriples)); BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); BiarchTripleAliases.append(begin(X86_64AndroidTriples), end(X86_64AndroidTriples)); break; default: break; } return; } if (TargetTriple.isOSHurd()) { switch (TargetTriple.getArch()) { case llvm::Triple::x86_64: LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); TripleAliases.push_back("x86_64-gnu"); break; case llvm::Triple::x86: LibDirs.append(begin(X86LibDirs), end(X86LibDirs)); TripleAliases.push_back("i686-gnu"); break; default: break; } return; } switch (TargetTriple.getArch()) { case llvm::Triple::aarch64: LibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs)); TripleAliases.append(begin(AArch64Triples), end(AArch64Triples)); BiarchLibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs)); BiarchTripleAliases.append(begin(AArch64Triples), end(AArch64Triples)); break; case llvm::Triple::aarch64_be: LibDirs.append(begin(AArch64beLibDirs), end(AArch64beLibDirs)); TripleAliases.append(begin(AArch64beTriples), end(AArch64beTriples)); BiarchLibDirs.append(begin(AArch64beLibDirs), end(AArch64beLibDirs)); BiarchTripleAliases.append(begin(AArch64beTriples), end(AArch64beTriples)); break; case llvm::Triple::arm: case llvm::Triple::thumb: LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs)); if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF || + TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHFT64 || TargetTriple.getEnvironment() == llvm::Triple::MuslEABIHF || TargetTriple.getEnvironment() == llvm::Triple::EABIHF) { TripleAliases.append(begin(ARMHFTriples), end(ARMHFTriples)); } else { TripleAliases.append(begin(ARMTriples), end(ARMTriples)); } break; case llvm::Triple::armeb: case llvm::Triple::thumbeb: LibDirs.append(begin(ARMebLibDirs), end(ARMebLibDirs)); if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF || + TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHFT64 || TargetTriple.getEnvironment() == llvm::Triple::MuslEABIHF || TargetTriple.getEnvironment() == llvm::Triple::EABIHF) { TripleAliases.append(begin(ARMebHFTriples), end(ARMebHFTriples)); } else { TripleAliases.append(begin(ARMebTriples), end(ARMebTriples)); } break; case llvm::Triple::avr: LibDirs.append(begin(AVRLibDirs), end(AVRLibDirs)); TripleAliases.append(begin(AVRTriples), end(AVRTriples)); break; case llvm::Triple::csky: LibDirs.append(begin(CSKYLibDirs), end(CSKYLibDirs)); TripleAliases.append(begin(CSKYTriples), end(CSKYTriples)); break; case llvm::Triple::x86_64: if (TargetTriple.isX32()) { LibDirs.append(begin(X32LibDirs), end(X32LibDirs)); TripleAliases.append(begin(X32Triples), end(X32Triples)); BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples)); } else { LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); TripleAliases.append(begin(X86_64Triples), end(X86_64Triples)); BiarchLibDirs.append(begin(X32LibDirs), end(X32LibDirs)); BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); } BiarchLibDirs.append(begin(X86LibDirs), end(X86LibDirs)); BiarchTripleAliases.append(begin(X86Triples), end(X86Triples)); break; case llvm::Triple::x86: LibDirs.append(begin(X86LibDirs), end(X86LibDirs)); // MCU toolchain is 32 bit only and its triple alias is TargetTriple // itself, which will be appended below. if (!TargetTriple.isOSIAMCU()) { TripleAliases.append(begin(X86Triples), end(X86Triples)); BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples)); BiarchLibDirs.append(begin(X32LibDirs), end(X32LibDirs)); BiarchTripleAliases.append(begin(X32Triples), end(X32Triples)); } break; // TODO: Handle loongarch32. case llvm::Triple::loongarch64: LibDirs.append(begin(LoongArch64LibDirs), end(LoongArch64LibDirs)); TripleAliases.append(begin(LoongArch64Triples), end(LoongArch64Triples)); break; case llvm::Triple::m68k: LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs)); TripleAliases.append(begin(M68kTriples), end(M68kTriples)); break; case llvm::Triple::mips: LibDirs.append(begin(MIPSLibDirs), end(MIPSLibDirs)); TripleAliases.append(begin(MIPSTriples), end(MIPSTriples)); BiarchLibDirs.append(begin(MIPS64LibDirs), end(MIPS64LibDirs)); BiarchTripleAliases.append(begin(MIPS64Triples), end(MIPS64Triples)); BiarchLibDirs.append(begin(MIPSN32LibDirs), end(MIPSN32LibDirs)); BiarchTripleAliases.append(begin(MIPSN32Triples), end(MIPSN32Triples)); break; case llvm::Triple::mipsel: LibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs)); TripleAliases.append(begin(MIPSELTriples), end(MIPSELTriples)); TripleAliases.append(begin(MIPSTriples), end(MIPSTriples)); BiarchLibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs)); BiarchTripleAliases.append(begin(MIPS64ELTriples), end(MIPS64ELTriples)); BiarchLibDirs.append(begin(MIPSN32ELLibDirs), end(MIPSN32ELLibDirs)); BiarchTripleAliases.append(begin(MIPSN32ELTriples), end(MIPSN32ELTriples)); break; case llvm::Triple::mips64: LibDirs.append(begin(MIPS64LibDirs), end(MIPS64LibDirs)); TripleAliases.append(begin(MIPS64Triples), end(MIPS64Triples)); BiarchLibDirs.append(begin(MIPSLibDirs), end(MIPSLibDirs)); BiarchTripleAliases.append(begin(MIPSTriples), end(MIPSTriples)); BiarchLibDirs.append(begin(MIPSN32LibDirs), end(MIPSN32LibDirs)); BiarchTripleAliases.append(begin(MIPSN32Triples), end(MIPSN32Triples)); break; case llvm::Triple::mips64el: LibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs)); TripleAliases.append(begin(MIPS64ELTriples), end(MIPS64ELTriples)); BiarchLibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs)); BiarchTripleAliases.append(begin(MIPSELTriples), end(MIPSELTriples)); BiarchLibDirs.append(begin(MIPSN32ELLibDirs), end(MIPSN32ELLibDirs)); BiarchTripleAliases.append(begin(MIPSN32ELTriples), end(MIPSN32ELTriples)); BiarchTripleAliases.append(begin(MIPSTriples), end(MIPSTriples)); break; case llvm::Triple::msp430: LibDirs.append(begin(MSP430LibDirs), end(MSP430LibDirs)); TripleAliases.append(begin(MSP430Triples), end(MSP430Triples)); break; case llvm::Triple::ppc: LibDirs.append(begin(PPCLibDirs), end(PPCLibDirs)); TripleAliases.append(begin(PPCTriples), end(PPCTriples)); BiarchLibDirs.append(begin(PPC64LibDirs), end(PPC64LibDirs)); BiarchTripleAliases.append(begin(PPC64Triples), end(PPC64Triples)); break; case llvm::Triple::ppcle: LibDirs.append(begin(PPCLELibDirs), end(PPCLELibDirs)); TripleAliases.append(begin(PPCLETriples), end(PPCLETriples)); BiarchLibDirs.append(begin(PPC64LELibDirs), end(PPC64LELibDirs)); BiarchTripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples)); break; case llvm::Triple::ppc64: LibDirs.append(begin(PPC64LibDirs), end(PPC64LibDirs)); TripleAliases.append(begin(PPC64Triples), end(PPC64Triples)); BiarchLibDirs.append(begin(PPCLibDirs), end(PPCLibDirs)); BiarchTripleAliases.append(begin(PPCTriples), end(PPCTriples)); break; case llvm::Triple::ppc64le: LibDirs.append(begin(PPC64LELibDirs), end(PPC64LELibDirs)); TripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples)); BiarchLibDirs.append(begin(PPCLELibDirs), end(PPCLELibDirs)); BiarchTripleAliases.append(begin(PPCLETriples), end(PPCLETriples)); break; case llvm::Triple::riscv32: LibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs)); TripleAliases.append(begin(RISCV32Triples), end(RISCV32Triples)); BiarchLibDirs.append(begin(RISCV64LibDirs), end(RISCV64LibDirs)); BiarchTripleAliases.append(begin(RISCV64Triples), end(RISCV64Triples)); break; case llvm::Triple::riscv64: LibDirs.append(begin(RISCV64LibDirs), end(RISCV64LibDirs)); TripleAliases.append(begin(RISCV64Triples), end(RISCV64Triples)); BiarchLibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs)); BiarchTripleAliases.append(begin(RISCV32Triples), end(RISCV32Triples)); break; case llvm::Triple::sparc: case llvm::Triple::sparcel: LibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs)); TripleAliases.append(begin(SPARCv8Triples), end(SPARCv8Triples)); BiarchLibDirs.append(begin(SPARCv9LibDirs), end(SPARCv9LibDirs)); BiarchTripleAliases.append(begin(SPARCv9Triples), end(SPARCv9Triples)); break; case llvm::Triple::sparcv9: LibDirs.append(begin(SPARCv9LibDirs), end(SPARCv9LibDirs)); TripleAliases.append(begin(SPARCv9Triples), end(SPARCv9Triples)); BiarchLibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs)); BiarchTripleAliases.append(begin(SPARCv8Triples), end(SPARCv8Triples)); break; case llvm::Triple::systemz: LibDirs.append(begin(SystemZLibDirs), end(SystemZLibDirs)); TripleAliases.append(begin(SystemZTriples), end(SystemZTriples)); break; default: // By default, just rely on the standard lib directories and the original // triple. break; } // Also include the multiarch variant if it's different. if (TargetTriple.str() != BiarchTriple.str()) BiarchTripleAliases.push_back(BiarchTriple.str()); } bool Generic_GCC::GCCInstallationDetector::ScanGCCForMultilibs( const llvm::Triple &TargetTriple, const ArgList &Args, StringRef Path, bool NeedsBiarchSuffix) { llvm::Triple::ArchType TargetArch = TargetTriple.getArch(); DetectedMultilibs Detected; // Android standalone toolchain could have multilibs for ARM and Thumb. // Debian mips multilibs behave more like the rest of the biarch ones, // so handle them there if (isArmOrThumbArch(TargetArch) && TargetTriple.isAndroid()) { // It should also work without multilibs in a simplified toolchain. findAndroidArmMultilibs(D, TargetTriple, Path, Args, Detected); } else if (TargetTriple.isCSKY()) { findCSKYMultilibs(D, TargetTriple, Path, Args, Detected); } else if (TargetTriple.isMIPS()) { if (!findMIPSMultilibs(D, TargetTriple, Path, Args, Detected)) return false; } else if (TargetTriple.isRISCV()) { findRISCVMultilibs(D, TargetTriple, Path, Args, Detected); } else if (isMSP430(TargetArch)) { findMSP430Multilibs(D, TargetTriple, Path, Args, Detected); } else if (TargetArch == llvm::Triple::avr) { // AVR has no multilibs. } else if (!findBiarchMultilibs(D, TargetTriple, Path, Args, NeedsBiarchSuffix, Detected)) { return false; } Multilibs = Detected.Multilibs; SelectedMultilib = Detected.SelectedMultilibs.empty() ? Multilib() : Detected.SelectedMultilibs.back(); BiarchSibling = Detected.BiarchSibling; return true; } void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple( const llvm::Triple &TargetTriple, const ArgList &Args, const std::string &LibDir, StringRef CandidateTriple, bool NeedsBiarchSuffix, bool GCCDirExists, bool GCCCrossDirExists) { // Locations relative to the system lib directory where GCC's triple-specific // directories might reside. struct GCCLibSuffix { // Path from system lib directory to GCC triple-specific directory. std::string LibSuffix; // Path from GCC triple-specific directory back to system lib directory. // This is one '..' component per component in LibSuffix. StringRef ReversePath; // Whether this library suffix is relevant for the triple. bool Active; } Suffixes[] = { // This is the normal place. {"gcc/" + CandidateTriple.str(), "../..", GCCDirExists}, // Debian puts cross-compilers in gcc-cross. {"gcc-cross/" + CandidateTriple.str(), "../..", GCCCrossDirExists}, // The Freescale PPC SDK has the gcc libraries in // /usr/lib//x.y.z so have a look there as well. Only do // this on Freescale triples, though, since some systems put a *lot* of // files in that location, not just GCC installation data. {CandidateTriple.str(), "..", TargetTriple.getVendor() == llvm::Triple::Freescale || TargetTriple.getVendor() == llvm::Triple::OpenEmbedded}}; for (auto &Suffix : Suffixes) { if (!Suffix.Active) continue; StringRef LibSuffix = Suffix.LibSuffix; std::error_code EC; for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(LibDir + "/" + LibSuffix, EC), LE; !EC && LI != LE; LI = LI.increment(EC)) { StringRef VersionText = llvm::sys::path::filename(LI->path()); GCCVersion CandidateVersion = GCCVersion::Parse(VersionText); if (CandidateVersion.Major != -1) // Filter obviously bad entries. if (!CandidateGCCInstallPaths.insert(std::string(LI->path())).second) continue; // Saw this path before; no need to look at it again. if (CandidateVersion.isOlderThan(4, 1, 1)) continue; if (CandidateVersion <= Version) continue; if (!ScanGCCForMultilibs(TargetTriple, Args, LI->path(), NeedsBiarchSuffix)) continue; Version = CandidateVersion; GCCTriple.setTriple(CandidateTriple); // FIXME: We hack together the directory name here instead of // using LI to ensure stable path separators across Windows and // Linux. GCCInstallPath = (LibDir + "/" + LibSuffix + "/" + VersionText).str(); GCCParentLibPath = (GCCInstallPath + "/../" + Suffix.ReversePath).str(); IsValid = true; } } } bool Generic_GCC::GCCInstallationDetector::ScanGentooConfigs( const llvm::Triple &TargetTriple, const ArgList &Args, const SmallVectorImpl &CandidateTriples, const SmallVectorImpl &CandidateBiarchTriples) { if (!D.getVFS().exists(concat(D.SysRoot, GentooConfigDir))) return false; for (StringRef CandidateTriple : CandidateTriples) { if (ScanGentooGccConfig(TargetTriple, Args, CandidateTriple)) return true; } for (StringRef CandidateTriple : CandidateBiarchTriples) { if (ScanGentooGccConfig(TargetTriple, Args, CandidateTriple, true)) return true; } return false; } bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig( const llvm::Triple &TargetTriple, const ArgList &Args, StringRef CandidateTriple, bool NeedsBiarchSuffix) { llvm::ErrorOr> File = D.getVFS().getBufferForFile(concat(D.SysRoot, GentooConfigDir, "/config-" + CandidateTriple.str())); if (File) { SmallVector Lines; File.get()->getBuffer().split(Lines, "\n"); for (StringRef Line : Lines) { Line = Line.trim(); // CURRENT=triple-version if (!Line.consume_front("CURRENT=")) continue; // Process the config file pointed to by CURRENT. llvm::ErrorOr> ConfigFile = D.getVFS().getBufferForFile( concat(D.SysRoot, GentooConfigDir, "/" + Line)); std::pair ActiveVersion = Line.rsplit('-'); // List of paths to scan for libraries. SmallVector GentooScanPaths; // Scan the Config file to find installed GCC libraries path. // Typical content of the GCC config file: // LDPATH="/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x:/usr/lib/gcc/ // (continued from previous line) x86_64-pc-linux-gnu/4.9.x/32" // MANPATH="/usr/share/gcc-data/x86_64-pc-linux-gnu/4.9.x/man" // INFOPATH="/usr/share/gcc-data/x86_64-pc-linux-gnu/4.9.x/info" // STDCXX_INCDIR="/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x/include/g++-v4" // We are looking for the paths listed in LDPATH=... . if (ConfigFile) { SmallVector ConfigLines; ConfigFile.get()->getBuffer().split(ConfigLines, "\n"); for (StringRef ConfLine : ConfigLines) { ConfLine = ConfLine.trim(); if (ConfLine.consume_front("LDPATH=")) { // Drop '"' from front and back if present. ConfLine.consume_back("\""); ConfLine.consume_front("\""); // Get all paths sperated by ':' ConfLine.split(GentooScanPaths, ':', -1, /*AllowEmpty*/ false); } } } // Test the path based on the version in /etc/env.d/gcc/config-{tuple}. std::string basePath = "/usr/lib/gcc/" + ActiveVersion.first.str() + "/" + ActiveVersion.second.str(); GentooScanPaths.push_back(StringRef(basePath)); // Scan all paths for GCC libraries. for (const auto &GentooScanPath : GentooScanPaths) { std::string GentooPath = concat(D.SysRoot, GentooScanPath); if (D.getVFS().exists(GentooPath + "/crtbegin.o")) { if (!ScanGCCForMultilibs(TargetTriple, Args, GentooPath, NeedsBiarchSuffix)) continue; Version = GCCVersion::Parse(ActiveVersion.second); GCCInstallPath = GentooPath; GCCParentLibPath = GentooPath + std::string("/../../.."); GCCTriple.setTriple(ActiveVersion.first); IsValid = true; return true; } } } } return false; } Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : ToolChain(D, Triple, Args), GCCInstallation(D), CudaInstallation(D, Triple, Args), RocmInstallation(D, Triple, Args) { getProgramPaths().push_back(getDriver().Dir); } Generic_GCC::~Generic_GCC() {} Tool *Generic_GCC::getTool(Action::ActionClass AC) const { switch (AC) { case Action::PreprocessJobClass: if (!Preprocess) Preprocess.reset(new clang::driver::tools::gcc::Preprocessor(*this)); return Preprocess.get(); case Action::CompileJobClass: if (!Compile) Compile.reset(new tools::gcc::Compiler(*this)); return Compile.get(); default: return ToolChain::getTool(AC); } } Tool *Generic_GCC::buildAssembler() const { return new tools::gnutools::Assembler(*this); } Tool *Generic_GCC::buildLinker() const { return new tools::gcc::Linker(*this); } void Generic_GCC::printVerboseInfo(raw_ostream &OS) const { // Print the information about how we detected the GCC installation. GCCInstallation.print(OS); CudaInstallation->print(OS); RocmInstallation->print(OS); } ToolChain::UnwindTableLevel Generic_GCC::getDefaultUnwindTableLevel(const ArgList &Args) const { switch (getArch()) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::ppc: case llvm::Triple::ppcle: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: case llvm::Triple::riscv32: case llvm::Triple::riscv64: case llvm::Triple::x86: case llvm::Triple::x86_64: return UnwindTableLevel::Asynchronous; default: return UnwindTableLevel::None; } } bool Generic_GCC::isPICDefault() const { switch (getArch()) { case llvm::Triple::x86_64: return getTriple().isOSWindows(); case llvm::Triple::mips64: case llvm::Triple::mips64el: return true; default: return false; } } bool Generic_GCC::isPIEDefault(const llvm::opt::ArgList &Args) const { return false; } bool Generic_GCC::isPICDefaultForced() const { return getArch() == llvm::Triple::x86_64 && getTriple().isOSWindows(); } bool Generic_GCC::IsIntegratedAssemblerDefault() const { switch (getTriple().getArch()) { case llvm::Triple::nvptx: case llvm::Triple::nvptx64: case llvm::Triple::xcore: return false; default: return true; } } void Generic_GCC::PushPPaths(ToolChain::path_list &PPaths) { // Cross-compiling binutils and GCC installations (vanilla and openSUSE at // least) put various tools in a triple-prefixed directory off of the parent // of the GCC installation. We use the GCC triple here to ensure that we end // up with tools that support the same amount of cross compiling as the // detected GCC installation. For example, if we find a GCC installation // targeting x86_64, but it is a bi-arch GCC installation, it can also be // used to target i386. if (GCCInstallation.isValid()) { PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../" + GCCInstallation.getTriple().str() + "/bin") .str()); } } void Generic_GCC::AddMultilibPaths(const Driver &D, const std::string &SysRoot, const std::string &OSLibDir, const std::string &MultiarchTriple, path_list &Paths) { // Add the multilib suffixed paths where they are available. if (GCCInstallation.isValid()) { assert(!SelectedMultilibs.empty()); const llvm::Triple &GCCTriple = GCCInstallation.getTriple(); const std::string &LibPath = std::string(GCCInstallation.getParentLibPath()); // Sourcery CodeBench MIPS toolchain holds some libraries under // a biarch-like suffix of the GCC installation. if (const auto &PathsCallback = Multilibs.filePathsCallback()) for (const auto &Path : PathsCallback(SelectedMultilibs.back())) addPathIfExists(D, GCCInstallation.getInstallPath() + Path, Paths); // Add lib/gcc/$triple/$version, with an optional /multilib suffix. addPathIfExists(D, GCCInstallation.getInstallPath() + SelectedMultilibs.back().gccSuffix(), Paths); // Add lib/gcc/$triple/$libdir // For GCC built with --enable-version-specific-runtime-libs. addPathIfExists(D, GCCInstallation.getInstallPath() + "/../" + OSLibDir, Paths); // GCC cross compiling toolchains will install target libraries which ship // as part of the toolchain under // rather than as // any part of the GCC installation in // //gcc//. This decision is somewhat // debatable, but is the reality today. We need to search this tree even // when we have a sysroot somewhere else. It is the responsibility of // whomever is doing the cross build targeting a sysroot using a GCC // installation that is *not* within the system root to ensure two things: // // 1) Any DSOs that are linked in from this tree or from the install path // above must be present on the system root and found via an // appropriate rpath. // 2) There must not be libraries installed into // // unless they should be preferred over // those within the system root. // // Note that this matches the GCC behavior. See the below comment for where // Clang diverges from GCC's behavior. addPathIfExists(D, LibPath + "/../" + GCCTriple.str() + "/lib/../" + OSLibDir + SelectedMultilibs.back().osSuffix(), Paths); // If the GCC installation we found is inside of the sysroot, we want to // prefer libraries installed in the parent prefix of the GCC installation. // It is important to *not* use these paths when the GCC installation is // outside of the system root as that can pick up unintended libraries. // This usually happens when there is an external cross compiler on the // host system, and a more minimal sysroot available that is the target of // the cross. Note that GCC does include some of these directories in some // configurations but this seems somewhere between questionable and simply // a bug. if (StringRef(LibPath).starts_with(SysRoot)) addPathIfExists(D, LibPath + "/../" + OSLibDir, Paths); } } void Generic_GCC::AddMultiarchPaths(const Driver &D, const std::string &SysRoot, const std::string &OSLibDir, path_list &Paths) { if (GCCInstallation.isValid()) { const std::string &LibPath = std::string(GCCInstallation.getParentLibPath()); const llvm::Triple &GCCTriple = GCCInstallation.getTriple(); const Multilib &Multilib = GCCInstallation.getMultilib(); addPathIfExists( D, LibPath + "/../" + GCCTriple.str() + "/lib" + Multilib.osSuffix(), Paths); } } void Generic_GCC::AddMultilibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { // Add include directories specific to the selected multilib set and multilib. if (!GCCInstallation.isValid()) return; // gcc TOOL_INCLUDE_DIR. const llvm::Triple &GCCTriple = GCCInstallation.getTriple(); std::string LibPath(GCCInstallation.getParentLibPath()); addSystemInclude(DriverArgs, CC1Args, Twine(LibPath) + "/../" + GCCTriple.str() + "/include"); const auto &Callback = Multilibs.includeDirsCallback(); if (Callback) { for (const auto &Path : Callback(GCCInstallation.getMultilib())) addExternCSystemIncludeIfExists(DriverArgs, CC1Args, GCCInstallation.getInstallPath() + Path); } } void Generic_GCC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (DriverArgs.hasArg(options::OPT_nostdinc, options::OPT_nostdincxx, options::OPT_nostdlibinc)) return; switch (GetCXXStdlibType(DriverArgs)) { case ToolChain::CST_Libcxx: addLibCxxIncludePaths(DriverArgs, CC1Args); break; case ToolChain::CST_Libstdcxx: addLibStdCxxIncludePaths(DriverArgs, CC1Args); break; } } void Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { const Driver &D = getDriver(); std::string SysRoot = computeSysRoot(); if (SysRoot.empty()) SysRoot = llvm::sys::path::get_separator(); auto AddIncludePath = [&](StringRef Path, bool TargetDirRequired = false) { std::string Version = detectLibcxxVersion(Path); if (Version.empty()) return false; // First add the per-target include path if it exists. bool TargetDirExists = false; std::optional TargetIncludeDir = getTargetSubDirPath(Path); if (TargetIncludeDir) { SmallString<128> TargetDir(*TargetIncludeDir); llvm::sys::path::append(TargetDir, "c++", Version); if (D.getVFS().exists(TargetDir)) { addSystemInclude(DriverArgs, CC1Args, TargetDir); TargetDirExists = true; } } if (TargetDirRequired && !TargetDirExists) return false; // Second add the generic one. SmallString<128> GenericDir(Path); llvm::sys::path::append(GenericDir, "c++", Version); addSystemInclude(DriverArgs, CC1Args, GenericDir); return true; }; // Android only uses the libc++ headers installed alongside the toolchain if // they contain an Android-specific target include path, otherwise they're // incompatible with the NDK libraries. SmallString<128> DriverIncludeDir(getDriver().Dir); llvm::sys::path::append(DriverIncludeDir, "..", "include"); if (AddIncludePath(DriverIncludeDir, /*TargetDirRequired=*/getTriple().isAndroid())) return; // If this is a development, non-installed, clang, libcxx will // not be found at ../include/c++ but it likely to be found at // one of the following two locations: SmallString<128> UsrLocalIncludeDir(SysRoot); llvm::sys::path::append(UsrLocalIncludeDir, "usr", "local", "include"); if (AddIncludePath(UsrLocalIncludeDir)) return; SmallString<128> UsrIncludeDir(SysRoot); llvm::sys::path::append(UsrIncludeDir, "usr", "include"); if (AddIncludePath(UsrIncludeDir)) return; } bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple, Twine IncludeSuffix, const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, bool DetectDebian) const { if (!getVFS().exists(IncludeDir)) return false; // Debian native gcc uses g++-multiarch-incdir.diff which uses // include/x86_64-linux-gnu/c++/10$IncludeSuffix instead of // include/c++/10/x86_64-linux-gnu$IncludeSuffix. std::string Dir = IncludeDir.str(); StringRef Include = llvm::sys::path::parent_path(llvm::sys::path::parent_path(Dir)); std::string Path = (Include + "/" + Triple + Dir.substr(Include.size()) + IncludeSuffix) .str(); if (DetectDebian && !getVFS().exists(Path)) return false; // GPLUSPLUS_INCLUDE_DIR addSystemInclude(DriverArgs, CC1Args, IncludeDir); // GPLUSPLUS_TOOL_INCLUDE_DIR. If Triple is not empty, add a target-dependent // include directory. if (DetectDebian) addSystemInclude(DriverArgs, CC1Args, Path); else if (!Triple.empty()) addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/" + Triple + IncludeSuffix); // GPLUSPLUS_BACKWARD_INCLUDE_DIR addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward"); return true; } bool Generic_GCC::addGCCLibStdCxxIncludePaths( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, StringRef DebianMultiarch) const { assert(GCCInstallation.isValid()); // By default, look for the C++ headers in an include directory adjacent to // the lib directory of the GCC installation. Note that this is expect to be // equivalent to '/usr/include/c++/X.Y' in almost all cases. StringRef LibDir = GCCInstallation.getParentLibPath(); StringRef InstallDir = GCCInstallation.getInstallPath(); StringRef TripleStr = GCCInstallation.getTriple().str(); const Multilib &Multilib = GCCInstallation.getMultilib(); const GCCVersion &Version = GCCInstallation.getVersion(); // Try /../$triple/include/c++/$version (gcc --print-multiarch is not empty). if (addLibStdCXXIncludePaths( LibDir.str() + "/../" + TripleStr + "/include/c++/" + Version.Text, TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args)) return true; // Try /gcc/$triple/$version/include/c++/ (gcc --print-multiarch is not // empty). Like above but for GCC built with // --enable-version-specific-runtime-libs. if (addLibStdCXXIncludePaths(LibDir.str() + "/gcc/" + TripleStr + "/" + Version.Text + "/include/c++/", TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args)) return true; // Detect Debian g++-multiarch-incdir.diff. if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text, DebianMultiarch, Multilib.includeSuffix(), DriverArgs, CC1Args, /*Debian=*/true)) return true; // Try /../include/c++/$version (gcc --print-multiarch is empty). if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text, TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args)) return true; // Otherwise, fall back on a bunch of options which don't use multiarch // layouts for simplicity. const std::string LibStdCXXIncludePathCandidates[] = { // Gentoo is weird and places its headers inside the GCC install, // so if the first attempt to find the headers fails, try these patterns. InstallDir.str() + "/include/g++-v" + Version.Text, InstallDir.str() + "/include/g++-v" + Version.MajorStr + "." + Version.MinorStr, InstallDir.str() + "/include/g++-v" + Version.MajorStr, }; for (const auto &IncludePath : LibStdCXXIncludePathCandidates) { if (addLibStdCXXIncludePaths(IncludePath, TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args)) return true; } return false; } void Generic_GCC::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { if (GCCInstallation.isValid()) { addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args, GCCInstallation.getTriple().str()); } } llvm::opt::DerivedArgList * Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef, Action::OffloadKind DeviceOffloadKind) const { // If this tool chain is used for an OpenMP offloading device we have to make // sure we always generate a shared library regardless of the commands the // user passed to the host. This is required because the runtime library // is required to load the device image dynamically at run time. if (DeviceOffloadKind == Action::OFK_OpenMP) { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); // Request the shared library. Given that these options are decided // implicitly, they do not refer to any base argument. DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_shared)); DAL->AddFlagArg(/*BaseArg=*/nullptr, Opts.getOption(options::OPT_fPIC)); // Filter all the arguments we don't care passing to the offloading // toolchain as they can mess up with the creation of a shared library. for (auto *A : Args) { switch ((options::ID)A->getOption().getID()) { default: DAL->append(A); break; case options::OPT_shared: case options::OPT_dynamic: case options::OPT_static: case options::OPT_fPIC: case options::OPT_fno_PIC: case options::OPT_fpic: case options::OPT_fno_pic: case options::OPT_fPIE: case options::OPT_fno_PIE: case options::OPT_fpie: case options::OPT_fno_pie: break; } } return DAL; } return nullptr; } void Generic_ELF::anchor() {} void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args, Action::OffloadKind) const { if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, true)) CC1Args.push_back("-fno-use-init-array"); } diff --git a/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp b/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp index 2265138edbff..35bf39069605 100644 --- a/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp +++ b/contrib/llvm-project/clang/lib/Driver/ToolChains/Linux.cpp @@ -1,856 +1,857 @@ //===--- Linux.h - Linux ToolChain Implementations --------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "Linux.h" #include "Arch/ARM.h" #include "Arch/LoongArch.h" #include "Arch/Mips.h" #include "Arch/PPC.h" #include "Arch/RISCV.h" #include "CommonArgs.h" #include "clang/Config/config.h" #include "clang/Driver/Distro.h" #include "clang/Driver/Driver.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" #include "llvm/Option/ArgList.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/VirtualFileSystem.h" #include using namespace clang::driver; using namespace clang::driver::toolchains; using namespace clang; using namespace llvm::opt; using tools::addPathIfExists; /// Get our best guess at the multiarch triple for a target. /// /// Debian-based systems are starting to use a multiarch setup where they use /// a target-triple directory in the library and header search paths. /// Unfortunately, this triple does not align with the vanilla target triple, /// so we provide a rough mapping here. std::string Linux::getMultiarchTriple(const Driver &D, const llvm::Triple &TargetTriple, StringRef SysRoot) const { llvm::Triple::EnvironmentType TargetEnvironment = TargetTriple.getEnvironment(); bool IsAndroid = TargetTriple.isAndroid(); bool IsMipsR6 = TargetTriple.getSubArch() == llvm::Triple::MipsSubArch_r6; bool IsMipsN32Abi = TargetTriple.getEnvironment() == llvm::Triple::GNUABIN32; // For most architectures, just use whatever we have rather than trying to be // clever. switch (TargetTriple.getArch()) { default: break; // We use the existence of '/lib/' as a directory to detect some // common linux triples that don't quite match the Clang triple for both // 32-bit and 64-bit targets. Multiarch fixes its install triples to these // regardless of what the actual target triple is. case llvm::Triple::arm: case llvm::Triple::thumb: if (IsAndroid) return "arm-linux-androideabi"; if (TargetEnvironment == llvm::Triple::GNUEABIHF || TargetEnvironment == llvm::Triple::MuslEABIHF || TargetEnvironment == llvm::Triple::EABIHF) return "arm-linux-gnueabihf"; return "arm-linux-gnueabi"; case llvm::Triple::armeb: case llvm::Triple::thumbeb: if (TargetEnvironment == llvm::Triple::GNUEABIHF || TargetEnvironment == llvm::Triple::MuslEABIHF || TargetEnvironment == llvm::Triple::EABIHF) return "armeb-linux-gnueabihf"; return "armeb-linux-gnueabi"; case llvm::Triple::x86: if (IsAndroid) return "i686-linux-android"; return "i386-linux-gnu"; case llvm::Triple::x86_64: if (IsAndroid) return "x86_64-linux-android"; if (TargetEnvironment == llvm::Triple::GNUX32) return "x86_64-linux-gnux32"; return "x86_64-linux-gnu"; case llvm::Triple::aarch64: if (IsAndroid) return "aarch64-linux-android"; if (hasEffectiveTriple() && getEffectiveTriple().getEnvironment() == llvm::Triple::PAuthTest) return "aarch64-linux-pauthtest"; return "aarch64-linux-gnu"; case llvm::Triple::aarch64_be: return "aarch64_be-linux-gnu"; case llvm::Triple::loongarch64: { const char *Libc; const char *FPFlavor; if (TargetTriple.isGNUEnvironment()) { Libc = "gnu"; } else if (TargetTriple.isMusl()) { Libc = "musl"; } else { return TargetTriple.str(); } switch (TargetEnvironment) { default: return TargetTriple.str(); case llvm::Triple::GNUSF: FPFlavor = "sf"; break; case llvm::Triple::GNUF32: FPFlavor = "f32"; break; case llvm::Triple::GNU: case llvm::Triple::GNUF64: // This was going to be "f64" in an earlier Toolchain Conventions // revision, but starting from Feb 2023 the F64 ABI variants are // unmarked in their canonical forms. FPFlavor = ""; break; } return (Twine("loongarch64-linux-") + Libc + FPFlavor).str(); } case llvm::Triple::m68k: return "m68k-linux-gnu"; case llvm::Triple::mips: return IsMipsR6 ? "mipsisa32r6-linux-gnu" : "mips-linux-gnu"; case llvm::Triple::mipsel: return IsMipsR6 ? "mipsisa32r6el-linux-gnu" : "mipsel-linux-gnu"; case llvm::Triple::mips64: { std::string MT = std::string(IsMipsR6 ? "mipsisa64r6" : "mips64") + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); if (D.getVFS().exists(concat(SysRoot, "/lib", MT))) return MT; if (D.getVFS().exists(concat(SysRoot, "/lib/mips64-linux-gnu"))) return "mips64-linux-gnu"; break; } case llvm::Triple::mips64el: { std::string MT = std::string(IsMipsR6 ? "mipsisa64r6el" : "mips64el") + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); if (D.getVFS().exists(concat(SysRoot, "/lib", MT))) return MT; if (D.getVFS().exists(concat(SysRoot, "/lib/mips64el-linux-gnu"))) return "mips64el-linux-gnu"; break; } case llvm::Triple::ppc: if (D.getVFS().exists(concat(SysRoot, "/lib/powerpc-linux-gnuspe"))) return "powerpc-linux-gnuspe"; return "powerpc-linux-gnu"; case llvm::Triple::ppcle: return "powerpcle-linux-gnu"; case llvm::Triple::ppc64: return "powerpc64-linux-gnu"; case llvm::Triple::ppc64le: return "powerpc64le-linux-gnu"; case llvm::Triple::riscv64: if (IsAndroid) return "riscv64-linux-android"; return "riscv64-linux-gnu"; case llvm::Triple::sparc: return "sparc-linux-gnu"; case llvm::Triple::sparcv9: return "sparc64-linux-gnu"; case llvm::Triple::systemz: return "s390x-linux-gnu"; } return TargetTriple.str(); } static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) { if (Triple.isMIPS()) { if (Triple.isAndroid()) { StringRef CPUName; StringRef ABIName; tools::mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName); if (CPUName == "mips32r6") return "libr6"; if (CPUName == "mips32r2") return "libr2"; } // lib32 directory has a special meaning on MIPS targets. // It contains N32 ABI binaries. Use this folder if produce // code for N32 ABI only. if (tools::mips::hasMipsAbiArg(Args, "n32")) return "lib32"; return Triple.isArch32Bit() ? "lib" : "lib64"; } // It happens that only x86, PPC and SPARC use the 'lib32' variant of // oslibdir, and using that variant while targeting other architectures causes // problems because the libraries are laid out in shared system roots that // can't cope with a 'lib32' library search path being considered. So we only // enable them when we know we may need it. // // FIXME: This is a bit of a hack. We should really unify this code for // reasoning about oslibdir spellings with the lib dir spellings in the // GCCInstallationDetector, but that is a more significant refactoring. if (Triple.getArch() == llvm::Triple::x86 || Triple.isPPC32() || Triple.getArch() == llvm::Triple::sparc) return "lib32"; if (Triple.getArch() == llvm::Triple::x86_64 && Triple.isX32()) return "libx32"; if (Triple.getArch() == llvm::Triple::riscv32) return "lib32"; return Triple.isArch32Bit() ? "lib" : "lib64"; } Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) : Generic_ELF(D, Triple, Args) { GCCInstallation.init(Triple, Args); Multilibs = GCCInstallation.getMultilibs(); SelectedMultilibs.assign({GCCInstallation.getMultilib()}); llvm::Triple::ArchType Arch = Triple.getArch(); std::string SysRoot = computeSysRoot(); ToolChain::path_list &PPaths = getProgramPaths(); Generic_GCC::PushPPaths(PPaths); Distro Distro(D.getVFS(), Triple); if (Distro.IsAlpineLinux() || Triple.isAndroid()) { ExtraOpts.push_back("-z"); ExtraOpts.push_back("now"); } if (Distro.IsOpenSUSE() || Distro.IsUbuntu() || Distro.IsAlpineLinux() || Triple.isAndroid()) { ExtraOpts.push_back("-z"); ExtraOpts.push_back("relro"); } // Note, lld from 11 onwards default max-page-size to 65536 for both ARM and // AArch64. if (Triple.isAndroid()) { if (Triple.isARM()) { // Android ARM uses max-page-size=4096 to reduce VMA usage. ExtraOpts.push_back("-z"); ExtraOpts.push_back("max-page-size=4096"); } else if (Triple.isAArch64() || Triple.getArch() == llvm::Triple::x86_64) { // Android AArch64 uses max-page-size=16384 to support 4k/16k page sizes. // Android emulates a 16k page size for app testing on x86_64 machines. ExtraOpts.push_back("-z"); ExtraOpts.push_back("max-page-size=16384"); } } if (GCCInstallation.getParentLibPath().contains("opt/rh/")) // With devtoolset on RHEL, we want to add a bin directory that is relative // to the detected gcc install, because if we are using devtoolset gcc then // we want to use other tools from devtoolset (e.g. ld) instead of the // standard system tools. PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../bin").str()); if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb) ExtraOpts.push_back("-X"); const bool IsAndroid = Triple.isAndroid(); const bool IsMips = Triple.isMIPS(); const bool IsHexagon = Arch == llvm::Triple::hexagon; const bool IsRISCV = Triple.isRISCV(); const bool IsCSKY = Triple.isCSKY(); if (IsCSKY && !SelectedMultilibs.empty()) SysRoot = SysRoot + SelectedMultilibs.back().osSuffix(); if ((IsMips || IsCSKY) && !SysRoot.empty()) ExtraOpts.push_back("--sysroot=" + SysRoot); // Do not use 'gnu' hash style for Mips targets because .gnu.hash // and the MIPS ABI require .dynsym to be sorted in different ways. // .gnu.hash needs symbols to be grouped by hash code whereas the MIPS // ABI requires a mapping between the GOT and the symbol table. // Android loader does not support .gnu.hash until API 23. // Hexagon linker/loader does not support .gnu.hash if (!IsMips && !IsHexagon) { if (Distro.IsOpenSUSE() || Distro == Distro::UbuntuLucid || Distro == Distro::UbuntuJaunty || Distro == Distro::UbuntuKarmic || (IsAndroid && Triple.isAndroidVersionLT(23))) ExtraOpts.push_back("--hash-style=both"); else ExtraOpts.push_back("--hash-style=gnu"); } #ifdef ENABLE_LINKER_BUILD_ID ExtraOpts.push_back("--build-id"); #endif // The selection of paths to try here is designed to match the patterns which // the GCC driver itself uses, as this is part of the GCC-compatible driver. // This was determined by running GCC in a fake filesystem, creating all // possible permutations of these directories, and seeing which ones it added // to the link paths. path_list &Paths = getFilePaths(); const std::string OSLibDir = std::string(getOSLibDir(Triple, Args)); const std::string MultiarchTriple = getMultiarchTriple(D, Triple, SysRoot); // mips32: Debian multilib, we use /libo32, while in other case, /lib is // used. We need add both libo32 and /lib. if (Arch == llvm::Triple::mips || Arch == llvm::Triple::mipsel) { Generic_GCC::AddMultilibPaths(D, SysRoot, "libo32", MultiarchTriple, Paths); addPathIfExists(D, concat(SysRoot, "/libo32"), Paths); addPathIfExists(D, concat(SysRoot, "/usr/libo32"), Paths); } Generic_GCC::AddMultilibPaths(D, SysRoot, OSLibDir, MultiarchTriple, Paths); addPathIfExists(D, concat(SysRoot, "/lib", MultiarchTriple), Paths); addPathIfExists(D, concat(SysRoot, "/lib/..", OSLibDir), Paths); if (IsAndroid) { // Android sysroots contain a library directory for each supported OS // version as well as some unversioned libraries in the usual multiarch // directory. addPathIfExists( D, concat(SysRoot, "/usr/lib", MultiarchTriple, llvm::to_string(Triple.getEnvironmentVersion().getMajor())), Paths); } addPathIfExists(D, concat(SysRoot, "/usr/lib", MultiarchTriple), Paths); // 64-bit OpenEmbedded sysroots may not have a /usr/lib dir. So they cannot // find /usr/lib64 as it is referenced as /usr/lib/../lib64. So we handle // this here. if (Triple.getVendor() == llvm::Triple::OpenEmbedded && Triple.isArch64Bit()) addPathIfExists(D, concat(SysRoot, "/usr", OSLibDir), Paths); else addPathIfExists(D, concat(SysRoot, "/usr/lib/..", OSLibDir), Paths); if (IsRISCV) { StringRef ABIName = tools::riscv::getRISCVABI(Args, Triple); addPathIfExists(D, concat(SysRoot, "/", OSLibDir, ABIName), Paths); addPathIfExists(D, concat(SysRoot, "/usr", OSLibDir, ABIName), Paths); } Generic_GCC::AddMultiarchPaths(D, SysRoot, OSLibDir, Paths); addPathIfExists(D, concat(SysRoot, "/lib"), Paths); addPathIfExists(D, concat(SysRoot, "/usr/lib"), Paths); } ToolChain::RuntimeLibType Linux::GetDefaultRuntimeLibType() const { if (getTriple().isAndroid()) return ToolChain::RLT_CompilerRT; return Generic_ELF::GetDefaultRuntimeLibType(); } unsigned Linux::GetDefaultDwarfVersion() const { if (getTriple().isAndroid()) return 4; return ToolChain::GetDefaultDwarfVersion(); } ToolChain::CXXStdlibType Linux::GetDefaultCXXStdlibType() const { if (getTriple().isAndroid()) return ToolChain::CST_Libcxx; return ToolChain::CST_Libstdcxx; } bool Linux::HasNativeLLVMSupport() const { return true; } Tool *Linux::buildLinker() const { return new tools::gnutools::Linker(*this); } Tool *Linux::buildStaticLibTool() const { return new tools::gnutools::StaticLibTool(*this); } Tool *Linux::buildAssembler() const { return new tools::gnutools::Assembler(*this); } std::string Linux::computeSysRoot() const { if (!getDriver().SysRoot.empty()) return getDriver().SysRoot; if (getTriple().isAndroid()) { // Android toolchains typically include a sysroot at ../sysroot relative to // the clang binary. const StringRef ClangDir = getDriver().Dir; std::string AndroidSysRootPath = (ClangDir + "/../sysroot").str(); if (getVFS().exists(AndroidSysRootPath)) return AndroidSysRootPath; } if (getTriple().isCSKY()) { // CSKY toolchains use different names for sysroot folder. if (!GCCInstallation.isValid()) return std::string(); // GCCInstallation.getInstallPath() = // $GCCToolchainPath/lib/gcc/csky-linux-gnuabiv2/6.3.0 // Path = $GCCToolchainPath/csky-linux-gnuabiv2/libc std::string Path = (GCCInstallation.getInstallPath() + "/../../../../" + GCCInstallation.getTriple().str() + "/libc") .str(); if (getVFS().exists(Path)) return Path; return std::string(); } if (!GCCInstallation.isValid() || !getTriple().isMIPS()) return std::string(); // Standalone MIPS toolchains use different names for sysroot folder // and put it into different places. Here we try to check some known // variants. const StringRef InstallDir = GCCInstallation.getInstallPath(); const StringRef TripleStr = GCCInstallation.getTriple().str(); const Multilib &Multilib = GCCInstallation.getMultilib(); std::string Path = (InstallDir + "/../../../../" + TripleStr + "/libc" + Multilib.osSuffix()) .str(); if (getVFS().exists(Path)) return Path; Path = (InstallDir + "/../../../../sysroot" + Multilib.osSuffix()).str(); if (getVFS().exists(Path)) return Path; return std::string(); } std::string Linux::getDynamicLinker(const ArgList &Args) const { const llvm::Triple::ArchType Arch = getArch(); const llvm::Triple &Triple = getTriple(); const Distro Distro(getDriver().getVFS(), Triple); if (Triple.isAndroid()) { if (getSanitizerArgs(Args).needsHwasanRt() && !Triple.isAndroidVersionLT(34) && Triple.isArch64Bit()) { // On Android 14 and newer, there is a special linker_hwasan64 that // allows to run HWASan binaries on non-HWASan system images. This // is also available on HWASan system images, so we can just always // use that instead. return "/system/bin/linker_hwasan64"; } return Triple.isArch64Bit() ? "/system/bin/linker64" : "/system/bin/linker"; } if (Triple.isMusl()) { std::string ArchName; bool IsArm = false; switch (Arch) { case llvm::Triple::arm: case llvm::Triple::thumb: ArchName = "arm"; IsArm = true; break; case llvm::Triple::armeb: case llvm::Triple::thumbeb: ArchName = "armeb"; IsArm = true; break; case llvm::Triple::x86: ArchName = "i386"; break; case llvm::Triple::x86_64: ArchName = Triple.isX32() ? "x32" : Triple.getArchName().str(); break; default: ArchName = Triple.getArchName().str(); } if (IsArm && (Triple.getEnvironment() == llvm::Triple::MuslEABIHF || tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard)) ArchName += "hf"; if (Arch == llvm::Triple::ppc && Triple.getSubArch() == llvm::Triple::PPCSubArch_spe) ArchName = "powerpc-sf"; return "/lib/ld-musl-" + ArchName + ".so.1"; } std::string LibDir; std::string Loader; switch (Arch) { default: llvm_unreachable("unsupported architecture"); case llvm::Triple::aarch64: LibDir = "lib"; Loader = "ld-linux-aarch64.so.1"; break; case llvm::Triple::aarch64_be: LibDir = "lib"; Loader = "ld-linux-aarch64_be.so.1"; break; case llvm::Triple::arm: case llvm::Triple::thumb: case llvm::Triple::armeb: case llvm::Triple::thumbeb: { const bool HF = Triple.getEnvironment() == llvm::Triple::GNUEABIHF || + Triple.getEnvironment() == llvm::Triple::GNUEABIHFT64 || tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard; LibDir = "lib"; Loader = HF ? "ld-linux-armhf.so.3" : "ld-linux.so.3"; break; } case llvm::Triple::loongarch32: { LibDir = "lib32"; Loader = ("ld-linux-loongarch-" + tools::loongarch::getLoongArchABI(getDriver(), Args, Triple) + ".so.1") .str(); break; } case llvm::Triple::loongarch64: { LibDir = "lib64"; Loader = ("ld-linux-loongarch-" + tools::loongarch::getLoongArchABI(getDriver(), Args, Triple) + ".so.1") .str(); break; } case llvm::Triple::m68k: LibDir = "lib"; Loader = "ld.so.1"; break; case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::mips64: case llvm::Triple::mips64el: { bool IsNaN2008 = tools::mips::isNaN2008(getDriver(), Args, Triple); LibDir = "lib" + tools::mips::getMipsABILibSuffix(Args, Triple); if (tools::mips::isUCLibc(Args)) Loader = IsNaN2008 ? "ld-uClibc-mipsn8.so.0" : "ld-uClibc.so.0"; else if (!Triple.hasEnvironment() && Triple.getVendor() == llvm::Triple::VendorType::MipsTechnologies) Loader = Triple.isLittleEndian() ? "ld-musl-mipsel.so.1" : "ld-musl-mips.so.1"; else Loader = IsNaN2008 ? "ld-linux-mipsn8.so.1" : "ld.so.1"; break; } case llvm::Triple::ppc: LibDir = "lib"; Loader = "ld.so.1"; break; case llvm::Triple::ppcle: LibDir = "lib"; Loader = "ld.so.1"; break; case llvm::Triple::ppc64: LibDir = "lib64"; Loader = (tools::ppc::hasPPCAbiArg(Args, "elfv2")) ? "ld64.so.2" : "ld64.so.1"; break; case llvm::Triple::ppc64le: LibDir = "lib64"; Loader = (tools::ppc::hasPPCAbiArg(Args, "elfv1")) ? "ld64.so.1" : "ld64.so.2"; break; case llvm::Triple::riscv32: case llvm::Triple::riscv64: { StringRef ArchName = llvm::Triple::getArchTypeName(Arch); StringRef ABIName = tools::riscv::getRISCVABI(Args, Triple); LibDir = "lib"; Loader = ("ld-linux-" + ArchName + "-" + ABIName + ".so.1").str(); break; } case llvm::Triple::sparc: case llvm::Triple::sparcel: LibDir = "lib"; Loader = "ld-linux.so.2"; break; case llvm::Triple::sparcv9: LibDir = "lib64"; Loader = "ld-linux.so.2"; break; case llvm::Triple::systemz: LibDir = "lib"; Loader = "ld64.so.1"; break; case llvm::Triple::x86: LibDir = "lib"; Loader = "ld-linux.so.2"; break; case llvm::Triple::x86_64: { bool X32 = Triple.isX32(); LibDir = X32 ? "libx32" : "lib64"; Loader = X32 ? "ld-linux-x32.so.2" : "ld-linux-x86-64.so.2"; break; } case llvm::Triple::ve: return "/opt/nec/ve/lib/ld-linux-ve.so.1"; case llvm::Triple::csky: { LibDir = "lib"; Loader = "ld.so.1"; break; } } if (Distro == Distro::Exherbo && (Triple.getVendor() == llvm::Triple::UnknownVendor || Triple.getVendor() == llvm::Triple::PC)) return "/usr/" + Triple.str() + "/lib/" + Loader; return "/" + LibDir + "/" + Loader; } void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { const Driver &D = getDriver(); std::string SysRoot = computeSysRoot(); if (DriverArgs.hasArg(clang::driver::options::OPT_nostdinc)) return; // Add 'include' in the resource directory, which is similar to // GCC_INCLUDE_DIR (private headers) in GCC. Note: the include directory // contains some files conflicting with system /usr/include. musl systems // prefer the /usr/include copies which are more relevant. SmallString<128> ResourceDirInclude(D.ResourceDir); llvm::sys::path::append(ResourceDirInclude, "include"); if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && (!getTriple().isMusl() || DriverArgs.hasArg(options::OPT_nostdlibinc))) addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude); if (DriverArgs.hasArg(options::OPT_nostdlibinc)) return; // LOCAL_INCLUDE_DIR addSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/local/include")); // TOOL_INCLUDE_DIR AddMultilibIncludeArgs(DriverArgs, CC1Args); // Check for configure-time C include directories. StringRef CIncludeDirs(C_INCLUDE_DIRS); if (CIncludeDirs != "") { SmallVector dirs; CIncludeDirs.split(dirs, ":"); for (StringRef dir : dirs) { StringRef Prefix = llvm::sys::path::is_absolute(dir) ? "" : StringRef(SysRoot); addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir); } return; } // On systems using multiarch and Android, add /usr/include/$triple before // /usr/include. std::string MultiarchIncludeDir = getMultiarchTriple(D, getTriple(), SysRoot); if (!MultiarchIncludeDir.empty() && D.getVFS().exists(concat(SysRoot, "/usr/include", MultiarchIncludeDir))) addExternCSystemInclude( DriverArgs, CC1Args, concat(SysRoot, "/usr/include", MultiarchIncludeDir)); if (getTriple().getOS() == llvm::Triple::RTEMS) return; // Add an include of '/include' directly. This isn't provided by default by // system GCCs, but is often used with cross-compiling GCCs, and harmless to // add even when Clang is acting as-if it were a system compiler. addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/include")); addExternCSystemInclude(DriverArgs, CC1Args, concat(SysRoot, "/usr/include")); if (!DriverArgs.hasArg(options::OPT_nobuiltininc) && getTriple().isMusl()) addSystemInclude(DriverArgs, CC1Args, ResourceDirInclude); } void Linux::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const { // We need a detected GCC installation on Linux to provide libstdc++'s // headers in odd Linuxish places. if (!GCCInstallation.isValid()) return; // Detect Debian g++-multiarch-incdir.diff. StringRef TripleStr = GCCInstallation.getTriple().str(); StringRef DebianMultiarch = GCCInstallation.getTriple().getArch() == llvm::Triple::x86 ? "i386-linux-gnu" : TripleStr; // Try generic GCC detection first. if (Generic_GCC::addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args, DebianMultiarch)) return; StringRef LibDir = GCCInstallation.getParentLibPath(); const Multilib &Multilib = GCCInstallation.getMultilib(); const GCCVersion &Version = GCCInstallation.getVersion(); const std::string LibStdCXXIncludePathCandidates[] = { // Android standalone toolchain has C++ headers in yet another place. LibDir.str() + "/../" + TripleStr.str() + "/include/c++/" + Version.Text, // Freescale SDK C++ headers are directly in /usr/include/c++, // without a subdirectory corresponding to the gcc version. LibDir.str() + "/../include/c++", // Cray's gcc installation puts headers under "g++" without a // version suffix. LibDir.str() + "/../include/g++", }; for (const auto &IncludePath : LibStdCXXIncludePathCandidates) { if (addLibStdCXXIncludePaths(IncludePath, TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args)) break; } } void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { CudaInstallation->AddCudaIncludeArgs(DriverArgs, CC1Args); } void Linux::AddHIPIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { RocmInstallation->AddHIPIncludeArgs(DriverArgs, CC1Args); } void Linux::AddHIPRuntimeLibArgs(const ArgList &Args, ArgStringList &CmdArgs) const { CmdArgs.push_back( Args.MakeArgString(StringRef("-L") + RocmInstallation->getLibPath())); if (Args.hasFlag(options::OPT_frtlib_add_rpath, options::OPT_fno_rtlib_add_rpath, false)) CmdArgs.append( {"-rpath", Args.MakeArgString(RocmInstallation->getLibPath())}); CmdArgs.push_back("-lamdhip64"); } void Linux::AddIAMCUIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { if (GCCInstallation.isValid()) { CC1Args.push_back("-isystem"); CC1Args.push_back(DriverArgs.MakeArgString( GCCInstallation.getParentLibPath() + "/../" + GCCInstallation.getTriple().str() + "/include")); } } bool Linux::isPIEDefault(const llvm::opt::ArgList &Args) const { return CLANG_DEFAULT_PIE_ON_LINUX || getTriple().isAndroid() || getTriple().isMusl() || getSanitizerArgs(Args).requiresPIE(); } bool Linux::IsAArch64OutlineAtomicsDefault(const ArgList &Args) const { // Outline atomics for AArch64 are supported by compiler-rt // and libgcc since 9.3.1 assert(getTriple().isAArch64() && "expected AArch64 target!"); ToolChain::RuntimeLibType RtLib = GetRuntimeLibType(Args); if (RtLib == ToolChain::RLT_CompilerRT) return true; assert(RtLib == ToolChain::RLT_Libgcc && "unexpected runtime library type!"); if (GCCInstallation.getVersion().isOlderThan(9, 3, 1)) return false; return true; } bool Linux::IsMathErrnoDefault() const { if (getTriple().isAndroid() || getTriple().isMusl()) return false; return Generic_ELF::IsMathErrnoDefault(); } SanitizerMask Linux::getSupportedSanitizers() const { const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; const bool IsMIPS = getTriple().isMIPS32(); const bool IsMIPS64 = getTriple().isMIPS64(); const bool IsPowerPC64 = getTriple().getArch() == llvm::Triple::ppc64 || getTriple().getArch() == llvm::Triple::ppc64le; const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64 || getTriple().getArch() == llvm::Triple::aarch64_be; const bool IsArmArch = getTriple().getArch() == llvm::Triple::arm || getTriple().getArch() == llvm::Triple::thumb || getTriple().getArch() == llvm::Triple::armeb || getTriple().getArch() == llvm::Triple::thumbeb; const bool IsLoongArch64 = getTriple().getArch() == llvm::Triple::loongarch64; const bool IsRISCV64 = getTriple().getArch() == llvm::Triple::riscv64; const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; SanitizerMask Res = ToolChain::getSupportedSanitizers(); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; Res |= SanitizerKind::Fuzzer; Res |= SanitizerKind::FuzzerNoLink; Res |= SanitizerKind::KernelAddress; Res |= SanitizerKind::Memory; Res |= SanitizerKind::Vptr; Res |= SanitizerKind::SafeStack; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsLoongArch64) Res |= SanitizerKind::DataFlow; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsArmArch || IsPowerPC64 || IsRISCV64 || IsSystemZ || IsHexagon || IsLoongArch64) Res |= SanitizerKind::Leak; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64 || IsSystemZ || IsLoongArch64 || IsRISCV64) Res |= SanitizerKind::Thread; if (IsX86_64 || IsSystemZ || IsPowerPC64) Res |= SanitizerKind::KernelMemory; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || IsPowerPC64 || IsHexagon || IsLoongArch64 || IsRISCV64) Res |= SanitizerKind::Scudo; if (IsX86_64 || IsAArch64 || IsRISCV64) { Res |= SanitizerKind::HWAddress; } if (IsX86_64 || IsAArch64) { Res |= SanitizerKind::KernelHWAddress; } if (IsX86_64) Res |= SanitizerKind::NumericalStability; // Work around "Cannot represent a difference across sections". if (getTriple().getArch() == llvm::Triple::ppc64) Res &= ~SanitizerKind::Function; return Res; } void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const { // Add linker option -u__llvm_profile_runtime to cause runtime // initialization module to be linked in. if (needsProfileRT(Args)) CmdArgs.push_back(Args.MakeArgString( Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); ToolChain::addProfileRTLibs(Args, CmdArgs); } void Linux::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { for (const auto &Opt : ExtraOpts) CmdArgs.push_back(Opt.c_str()); } const char *Linux::getDefaultLinker() const { if (getTriple().isAndroid()) return "ld.lld"; return Generic_ELF::getDefaultLinker(); } diff --git a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp index 631c7c62baac..a5268e153bcc 100644 --- a/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp +++ b/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp @@ -1,5056 +1,5061 @@ //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file contains the implementation of the UnwrappedLineParser, /// which turns a stream of tokens into UnwrappedLines. /// //===----------------------------------------------------------------------===// #include "UnwrappedLineParser.h" #include "FormatToken.h" #include "FormatTokenLexer.h" #include "FormatTokenSource.h" #include "Macros.h" #include "TokenAnnotator.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_os_ostream.h" #include "llvm/Support/raw_ostream.h" #include #include #define DEBUG_TYPE "format-parser" namespace clang { namespace format { namespace { void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, StringRef Prefix = "", bool PrintText = false) { OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; bool NewLine = false; for (std::list::const_iterator I = Line.Tokens.begin(), E = Line.Tokens.end(); I != E; ++I) { if (NewLine) { OS << Prefix; NewLine = false; } OS << I->Tok->Tok.getName() << "[" << "T=" << (unsigned)I->Tok->getType() << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText << "\"] "; for (SmallVectorImpl::const_iterator CI = I->Children.begin(), CE = I->Children.end(); CI != CE; ++CI) { OS << "\n"; printLine(OS, *CI, (Prefix + " ").str()); NewLine = true; } } if (!NewLine) OS << "\n"; } LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { printLine(llvm::dbgs(), Line); } class ScopedDeclarationState { public: ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, bool MustBeDeclaration) : Line(Line), Stack(Stack) { Line.MustBeDeclaration = MustBeDeclaration; Stack.push_back(MustBeDeclaration); } ~ScopedDeclarationState() { Stack.pop_back(); if (!Stack.empty()) Line.MustBeDeclaration = Stack.back(); else Line.MustBeDeclaration = true; } private: UnwrappedLine &Line; llvm::BitVector &Stack; }; } // end anonymous namespace std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { llvm::raw_os_ostream OS(Stream); printLine(OS, Line); return Stream; } class ScopedLineState { public: ScopedLineState(UnwrappedLineParser &Parser, bool SwitchToPreprocessorLines = false) : Parser(Parser), OriginalLines(Parser.CurrentLines) { if (SwitchToPreprocessorLines) Parser.CurrentLines = &Parser.PreprocessorDirectives; else if (!Parser.Line->Tokens.empty()) Parser.CurrentLines = &Parser.Line->Tokens.back().Children; PreBlockLine = std::move(Parser.Line); Parser.Line = std::make_unique(); Parser.Line->Level = PreBlockLine->Level; Parser.Line->PPLevel = PreBlockLine->PPLevel; Parser.Line->InPPDirective = PreBlockLine->InPPDirective; Parser.Line->InMacroBody = PreBlockLine->InMacroBody; Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; } ~ScopedLineState() { if (!Parser.Line->Tokens.empty()) Parser.addUnwrappedLine(); assert(Parser.Line->Tokens.empty()); Parser.Line = std::move(PreBlockLine); if (Parser.CurrentLines == &Parser.PreprocessorDirectives) Parser.MustBreakBeforeNextToken = true; Parser.CurrentLines = OriginalLines; } private: UnwrappedLineParser &Parser; std::unique_ptr PreBlockLine; SmallVectorImpl *OriginalLines; }; class CompoundStatementIndenter { public: CompoundStatementIndenter(UnwrappedLineParser *Parser, const FormatStyle &Style, unsigned &LineLevel) : CompoundStatementIndenter(Parser, LineLevel, Style.BraceWrapping.AfterControlStatement, Style.BraceWrapping.IndentBraces) {} CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, bool WrapBrace, bool IndentBrace) : LineLevel(LineLevel), OldLineLevel(LineLevel) { if (WrapBrace) Parser->addUnwrappedLine(); if (IndentBrace) ++LineLevel; } ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } private: unsigned &LineLevel; unsigned OldLineLevel; }; UnwrappedLineParser::UnwrappedLineParser( SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef Tokens, UnwrappedLineConsumer &Callback, llvm::SpecificBumpPtrAllocator &Allocator, IdentifierTable &IdentTable) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None ? IG_Rejected : IG_Inited), IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { assert(IsCpp == LangOpts.CXXOperatorNames); } void UnwrappedLineParser::reset() { PPBranchLevel = -1; IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None ? IG_Rejected : IG_Inited; IncludeGuardToken = nullptr; Line.reset(new UnwrappedLine); CommentsBeforeNextToken.clear(); FormatTok = nullptr; MustBreakBeforeNextToken = false; IsDecltypeAutoFunction = false; PreprocessorDirectives.clear(); CurrentLines = &Lines; DeclarationScopeStack.clear(); NestedTooDeep.clear(); NestedLambdas.clear(); PPStack.clear(); Line->FirstStartColumn = FirstStartColumn; if (!Unexpanded.empty()) for (FormatToken *Token : AllTokens) Token->MacroCtx.reset(); CurrentExpandedLines.clear(); ExpandedLines.clear(); Unexpanded.clear(); InExpansion = false; Reconstruct.reset(); } void UnwrappedLineParser::parse() { IndexedTokenSource TokenSource(AllTokens); Line->FirstStartColumn = FirstStartColumn; do { LLVM_DEBUG(llvm::dbgs() << "----\n"); reset(); Tokens = &TokenSource; TokenSource.reset(); readToken(); parseFile(); // If we found an include guard then all preprocessor directives (other than // the guard) are over-indented by one. if (IncludeGuard == IG_Found) { for (auto &Line : Lines) if (Line.InPPDirective && Line.Level > 0) --Line.Level; } // Create line with eof token. assert(eof()); pushToken(FormatTok); addUnwrappedLine(); // In a first run, format everything with the lines containing macro calls // replaced by the expansion. if (!ExpandedLines.empty()) { LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); for (const auto &Line : Lines) { if (!Line.Tokens.empty()) { auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); if (it != ExpandedLines.end()) { for (const auto &Expanded : it->second) { LLVM_DEBUG(printDebugInfo(Expanded)); Callback.consumeUnwrappedLine(Expanded); } continue; } } LLVM_DEBUG(printDebugInfo(Line)); Callback.consumeUnwrappedLine(Line); } Callback.finishRun(); } LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); for (const UnwrappedLine &Line : Lines) { LLVM_DEBUG(printDebugInfo(Line)); Callback.consumeUnwrappedLine(Line); } Callback.finishRun(); Lines.clear(); while (!PPLevelBranchIndex.empty() && PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); } if (!PPLevelBranchIndex.empty()) { ++PPLevelBranchIndex.back(); assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); } } while (!PPLevelBranchIndex.empty()); } void UnwrappedLineParser::parseFile() { // The top-level context in a file always has declarations, except for pre- // processor directives and JavaScript files. bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); if (Style.Language == FormatStyle::LK_TextProto) parseBracedList(); else parseLevel(); // Make sure to format the remaining tokens. // // LK_TextProto is special since its top-level is parsed as the body of a // braced list, which does not necessarily have natural line separators such // as a semicolon. Comments after the last entry that have been determined to // not belong to that line, as in: // key: value // // endfile comment // do not have a chance to be put on a line of their own until this point. // Here we add this newline before end-of-file comments. if (Style.Language == FormatStyle::LK_TextProto && !CommentsBeforeNextToken.empty()) { addUnwrappedLine(); } flushComments(true); addUnwrappedLine(); } void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { do { switch (FormatTok->Tok.getKind()) { case tok::l_brace: return; default: if (FormatTok->is(Keywords.kw_where)) { addUnwrappedLine(); nextToken(); parseCSharpGenericTypeConstraint(); break; } nextToken(); break; } } while (!eof()); } void UnwrappedLineParser::parseCSharpAttribute() { int UnpairedSquareBrackets = 1; do { switch (FormatTok->Tok.getKind()) { case tok::r_square: nextToken(); --UnpairedSquareBrackets; if (UnpairedSquareBrackets == 0) { addUnwrappedLine(); return; } break; case tok::l_square: ++UnpairedSquareBrackets; nextToken(); break; default: nextToken(); break; } } while (!eof()); } bool UnwrappedLineParser::precededByCommentOrPPDirective() const { if (!Lines.empty() && Lines.back().InPPDirective) return true; const FormatToken *Previous = Tokens->getPreviousToken(); return Previous && Previous->is(tok::comment) && (Previous->IsMultiline || Previous->NewlinesBefore > 0); } /// \brief Parses a level, that is ???. /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. /// \param IfKind The \p if statement kind in the level. /// \param IfLeftBrace The left brace of the \p if block in the level. /// \returns true if a simple block of if/else/for/while, or false otherwise. /// (A simple block has a single statement.) bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, IfStmtKind *IfKind, FormatToken **IfLeftBrace) { const bool InRequiresExpression = OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); const bool IsPrecededByCommentOrPPDirective = !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); FormatToken *IfLBrace = nullptr; bool HasDoWhile = false; bool HasLabel = false; unsigned StatementCount = 0; bool SwitchLabelEncountered = false; do { if (FormatTok->isAttribute()) { nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); continue; } tok::TokenKind Kind = FormatTok->Tok.getKind(); if (FormatTok->is(TT_MacroBlockBegin)) Kind = tok::l_brace; else if (FormatTok->is(TT_MacroBlockEnd)) Kind = tok::r_brace; auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] { parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile, HasLabel ? nullptr : &HasLabel); ++StatementCount; assert(StatementCount > 0 && "StatementCount overflow!"); }; switch (Kind) { case tok::comment: nextToken(); addUnwrappedLine(); break; case tok::l_brace: if (InRequiresExpression) { FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); } else if (FormatTok->Previous && FormatTok->Previous->ClosesRequiresClause) { // We need the 'default' case here to correctly parse a function // l_brace. ParseDefault(); continue; } if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { if (tryToParseBracedList()) continue; FormatTok->setFinalizedType(TT_BlockLBrace); } parseBlock(); ++StatementCount; assert(StatementCount > 0 && "StatementCount overflow!"); addUnwrappedLine(); break; case tok::r_brace: if (OpeningBrace) { if (!Style.RemoveBracesLLVM || Line->InPPDirective || !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { return false; } if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || HasDoWhile || IsPrecededByCommentOrPPDirective || precededByCommentOrPPDirective()) { return false; } const FormatToken *Next = Tokens->peekNextToken(); if (Next->is(tok::comment) && Next->NewlinesBefore == 0) return false; if (IfLeftBrace) *IfLeftBrace = IfLBrace; return true; } nextToken(); addUnwrappedLine(); break; case tok::kw_default: { unsigned StoredPosition = Tokens->getPosition(); auto *Next = Tokens->getNextNonComment(); FormatTok = Tokens->setPosition(StoredPosition); if (!Next->isOneOf(tok::colon, tok::arrow)) { // default not followed by `:` or `->` is not a case label; treat it // like an identifier. parseStructuralElement(); break; } // Else, if it is 'default:', fall through to the case handling. [[fallthrough]]; } case tok::kw_case: if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || (Style.isJavaScript() && Line->MustBeDeclaration)) { // Proto: there are no switch/case statements // Verilog: Case labels don't have this word. We handle case // labels including default in TokenAnnotator. // JavaScript: A 'case: string' style field declaration. ParseDefault(); break; } if (!SwitchLabelEncountered && (Style.IndentCaseLabels || (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || (Line->InPPDirective && Line->Level == 1))) { ++Line->Level; } SwitchLabelEncountered = true; parseStructuralElement(); break; case tok::l_square: if (Style.isCSharp()) { nextToken(); parseCSharpAttribute(); break; } if (handleCppAttributes()) break; [[fallthrough]]; default: ParseDefault(); break; } } while (!eof()); return false; } void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { // We'll parse forward through the tokens until we hit // a closing brace or eof - note that getNextToken() will // parse macros, so this will magically work inside macro // definitions, too. unsigned StoredPosition = Tokens->getPosition(); FormatToken *Tok = FormatTok; const FormatToken *PrevTok = Tok->Previous; // Keep a stack of positions of lbrace tokens. We will // update information about whether an lbrace starts a // braced init list or a different block during the loop. struct StackEntry { FormatToken *Tok; const FormatToken *PrevTok; }; SmallVector LBraceStack; assert(Tok->is(tok::l_brace)); do { auto *NextTok = Tokens->getNextNonComment(); if (!Line->InMacroBody && !Style.isTableGen()) { // Skip PPDirective lines and comments. while (NextTok->is(tok::hash)) { NextTok = Tokens->getNextToken(); if (NextTok->is(tok::pp_not_keyword)) break; do { NextTok = Tokens->getNextToken(); } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); while (NextTok->is(tok::comment)) NextTok = Tokens->getNextToken(); } } switch (Tok->Tok.getKind()) { case tok::l_brace: if (Style.isJavaScript() && PrevTok) { if (PrevTok->isOneOf(tok::colon, tok::less)) { // A ':' indicates this code is in a type, or a braced list // following a label in an object literal ({a: {b: 1}}). // A '<' could be an object used in a comparison, but that is nonsense // code (can never return true), so more likely it is a generic type // argument (`X<{a: string; b: number}>`). // The code below could be confused by semicolons between the // individual members in a type member list, which would normally // trigger BK_Block. In both cases, this must be parsed as an inline // braced init. Tok->setBlockKind(BK_BracedInit); } else if (PrevTok->is(tok::r_paren)) { // `) { }` can only occur in function or method declarations in JS. Tok->setBlockKind(BK_Block); } } else { Tok->setBlockKind(BK_Unknown); } LBraceStack.push_back({Tok, PrevTok}); break; case tok::r_brace: if (LBraceStack.empty()) break; if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { bool ProbablyBracedList = false; if (Style.Language == FormatStyle::LK_Proto) { ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); } else if (LBrace->isNot(TT_EnumLBrace)) { // Using OriginalColumn to distinguish between ObjC methods and // binary operators is a bit hacky. bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && NextTok->OriginalColumn == 0; // Try to detect a braced list. Note that regardless how we mark inner // braces here, we will overwrite the BlockKind later if we parse a // braced list (where all blocks inside are by default braced lists), // or when we explicitly detect blocks (for example while parsing // lambdas). // If we already marked the opening brace as braced list, the closing // must also be part of it. ProbablyBracedList = LBrace->is(TT_BracedListLBrace); ProbablyBracedList = ProbablyBracedList || (Style.isJavaScript() && NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, Keywords.kw_as)); ProbablyBracedList = ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || NextTok->is(tok::l_paren))); // If there is a comma, semicolon or right paren after the closing // brace, we assume this is a braced initializer list. // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a // braced list in JS. ProbablyBracedList = ProbablyBracedList || NextTok->isOneOf(tok::comma, tok::period, tok::colon, tok::r_paren, tok::r_square, tok::ellipsis); // Distinguish between braced list in a constructor initializer list // followed by constructor body, or just adjacent blocks. ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && LBraceStack.back().PrevTok->isOneOf(tok::identifier, tok::greater)); ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::identifier) && !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::semi) && (!ExpectClassBody || LBraceStack.size() != 1)); ProbablyBracedList = ProbablyBracedList || (NextTok->isBinaryOperator() && !NextIsObjCMethod); if (!Style.isCSharp() && NextTok->is(tok::l_square)) { // We can have an array subscript after a braced init // list, but C++11 attributes are expected after blocks. NextTok = Tokens->getNextToken(); ProbablyBracedList = NextTok->isNot(tok::l_square); } // Cpp macro definition body that is a nonempty braced list or block: if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && !FormatTok->Previous && NextTok->is(tok::eof) && // A statement can end with only `;` (simple statement), a block // closing brace (compound statement), or `:` (label statement). // If PrevTok is a block opening brace, Tok ends an empty block. !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { ProbablyBracedList = true; } } const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; Tok->setBlockKind(BlockKind); LBrace->setBlockKind(BlockKind); } LBraceStack.pop_back(); break; case tok::identifier: if (Tok->isNot(TT_StatementMacro)) break; [[fallthrough]]; case tok::at: case tok::semi: case tok::kw_if: case tok::kw_while: case tok::kw_for: case tok::kw_switch: case tok::kw_try: case tok::kw___try: if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) LBraceStack.back().Tok->setBlockKind(BK_Block); break; default: break; } PrevTok = Tok; Tok = NextTok; } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); // Assume other blocks for all unclosed opening braces. for (const auto &Entry : LBraceStack) if (Entry.Tok->is(BK_Unknown)) Entry.Tok->setBlockKind(BK_Block); FormatTok = Tokens->setPosition(StoredPosition); } // Sets the token type of the directly previous right brace. void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { if (auto Prev = FormatTok->getPreviousNonComment(); Prev && Prev->is(tok::r_brace)) { Prev->setFinalizedType(Type); } } template static inline void hash_combine(std::size_t &seed, const T &v) { std::hash hasher; seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } size_t UnwrappedLineParser::computePPHash() const { size_t h = 0; for (const auto &i : PPStack) { hash_combine(h, size_t(i.Kind)); hash_combine(h, i.Line); } return h; } // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace // is not null, subtracts its length (plus the preceding space) when computing // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before // running the token annotator on it so that we can restore them afterward. bool UnwrappedLineParser::mightFitOnOneLine( UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { const auto ColumnLimit = Style.ColumnLimit; if (ColumnLimit == 0) return true; auto &Tokens = ParsedLine.Tokens; assert(!Tokens.empty()); const auto *LastToken = Tokens.back().Tok; assert(LastToken); SmallVector SavedTokens(Tokens.size()); int Index = 0; for (const auto &Token : Tokens) { assert(Token.Tok); auto &SavedToken = SavedTokens[Index++]; SavedToken.Tok = new FormatToken; SavedToken.Tok->copyFrom(*Token.Tok); SavedToken.Children = std::move(Token.Children); } AnnotatedLine Line(ParsedLine); assert(Line.Last == LastToken); TokenAnnotator Annotator(Style, Keywords); Annotator.annotate(Line); Annotator.calculateFormattingInformation(Line); auto Length = LastToken->TotalLength; if (OpeningBrace) { assert(OpeningBrace != Tokens.front().Tok); if (auto Prev = OpeningBrace->Previous; Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { Length -= ColumnLimit; } Length -= OpeningBrace->TokenText.size() + 1; } if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); Length -= FirstToken->TokenText.size() + 1; } Index = 0; for (auto &Token : Tokens) { const auto &SavedToken = SavedTokens[Index++]; Token.Tok->copyFrom(*SavedToken.Tok); Token.Children = std::move(SavedToken.Children); delete SavedToken.Tok; } // If these change PPLevel needs to be used for get correct indentation. assert(!Line.InMacroBody); assert(!Line.InPPDirective); return Line.Level * Style.IndentWidth + Length <= ColumnLimit; } FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces, IfStmtKind *IfKind, bool UnindentWhitesmithsBraces) { auto HandleVerilogBlockLabel = [this]() { // ":" name if (Style.isVerilog() && FormatTok->is(tok::colon)) { nextToken(); if (Keywords.isVerilogIdentifier(*FormatTok)) nextToken(); } }; // Whether this is a Verilog-specific block that has a special header like a // module. const bool VerilogHierarchy = Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || (Style.isVerilog() && (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && "'{' or macro block token expected"); FormatToken *Tok = FormatTok; const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); auto Index = CurrentLines->size(); const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); FormatTok->setBlockKind(BK_Block); // For Whitesmiths mode, jump to the next level prior to skipping over the // braces. if (!VerilogHierarchy && AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { ++Line->Level; } size_t PPStartHash = computePPHash(); const unsigned InitialLevel = Line->Level; if (VerilogHierarchy) { AddLevels += parseVerilogHierarchyHeader(); } else { nextToken(/*LevelDifference=*/AddLevels); HandleVerilogBlockLabel(); } // Bail out if there are too many levels. Otherwise, the stack might overflow. if (Line->Level > 300) return nullptr; if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); size_t NbPreprocessorDirectives = !parsingPPDirective() ? PreprocessorDirectives.size() : 0; addUnwrappedLine(); size_t OpeningLineIndex = CurrentLines->empty() ? (UnwrappedLine::kInvalidIndex) : (CurrentLines->size() - 1 - NbPreprocessorDirectives); // Whitesmiths is weird here. The brace needs to be indented for the namespace // block, but the block itself may not be indented depending on the style // settings. This allows the format to back up one level in those cases. if (UnindentWhitesmithsBraces) --Line->Level; ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, MustBeDeclaration); if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) Line->Level += AddLevels; FormatToken *IfLBrace = nullptr; const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); if (eof()) return IfLBrace; if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) : FormatTok->isNot(tok::r_brace)) { Line->Level = InitialLevel; FormatTok->setBlockKind(BK_Block); return IfLBrace; } if (FormatTok->is(tok::r_brace)) { FormatTok->setBlockKind(BK_Block); if (Tok->is(TT_NamespaceLBrace)) FormatTok->setFinalizedType(TT_NamespaceRBrace); } const bool IsFunctionRBrace = FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); auto RemoveBraces = [=]() mutable { if (!SimpleBlock) return false; assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); assert(FormatTok->is(tok::r_brace)); const bool WrappedOpeningBrace = !Tok->Previous; if (WrappedOpeningBrace && FollowedByComment) return false; const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; if (KeepBraces && !HasRequiredIfBraces) return false; if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { const FormatToken *Previous = Tokens->getPreviousToken(); assert(Previous); if (Previous->is(tok::r_brace) && !Previous->Optional) return false; } assert(!CurrentLines->empty()); auto &LastLine = CurrentLines->back(); if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) return false; if (Tok->is(TT_ElseLBrace)) return true; if (WrappedOpeningBrace) { assert(Index > 0); --Index; // The line above the wrapped l_brace. Tok = nullptr; } return mightFitOnOneLine((*CurrentLines)[Index], Tok); }; if (RemoveBraces()) { Tok->MatchingParen = FormatTok; FormatTok->MatchingParen = Tok; } size_t PPEndHash = computePPHash(); // Munch the closing brace. nextToken(/*LevelDifference=*/-AddLevels); // When this is a function block and there is an unnecessary semicolon // afterwards then mark it as optional (so the RemoveSemi pass can get rid of // it later). if (Style.RemoveSemicolon && IsFunctionRBrace) { while (FormatTok->is(tok::semi)) { FormatTok->Optional = true; nextToken(); } } HandleVerilogBlockLabel(); if (MacroBlock && FormatTok->is(tok::l_paren)) parseParens(); Line->Level = InitialLevel; if (FormatTok->is(tok::kw_noexcept)) { // A noexcept in a requires expression. nextToken(); } if (FormatTok->is(tok::arrow)) { // Following the } or noexcept we can find a trailing return type arrow // as part of an implicit conversion constraint. nextToken(); parseStructuralElement(); } if (MunchSemi && FormatTok->is(tok::semi)) nextToken(); if (PPStartHash == PPEndHash) { Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { // Update the opening line to add the forward reference as well (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = CurrentLines->size() - 1; } } return IfLBrace; } static bool isGoogScope(const UnwrappedLine &Line) { // FIXME: Closure-library specific stuff should not be hard-coded but be // configurable. if (Line.Tokens.size() < 4) return false; auto I = Line.Tokens.begin(); if (I->Tok->TokenText != "goog") return false; ++I; if (I->Tok->isNot(tok::period)) return false; ++I; if (I->Tok->TokenText != "scope") return false; ++I; return I->Tok->is(tok::l_paren); } static bool isIIFE(const UnwrappedLine &Line, const AdditionalKeywords &Keywords) { // Look for the start of an immediately invoked anonymous function. // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression // This is commonly done in JavaScript to create a new, anonymous scope. // Example: (function() { ... })() if (Line.Tokens.size() < 3) return false; auto I = Line.Tokens.begin(); if (I->Tok->isNot(tok::l_paren)) return false; ++I; if (I->Tok->isNot(Keywords.kw_function)) return false; ++I; return I->Tok->is(tok::l_paren); } static bool ShouldBreakBeforeBrace(const FormatStyle &Style, const FormatToken &InitialToken) { tok::TokenKind Kind = InitialToken.Tok.getKind(); if (InitialToken.is(TT_NamespaceMacro)) Kind = tok::kw_namespace; switch (Kind) { case tok::kw_namespace: return Style.BraceWrapping.AfterNamespace; case tok::kw_class: return Style.BraceWrapping.AfterClass; case tok::kw_union: return Style.BraceWrapping.AfterUnion; case tok::kw_struct: return Style.BraceWrapping.AfterStruct; case tok::kw_enum: return Style.BraceWrapping.AfterEnum; default: return false; } } void UnwrappedLineParser::parseChildBlock() { assert(FormatTok->is(tok::l_brace)); FormatTok->setBlockKind(BK_Block); const FormatToken *OpeningBrace = FormatTok; nextToken(); { bool SkipIndent = (Style.isJavaScript() && (isGoogScope(*Line) || isIIFE(*Line, Keywords))); ScopedLineState LineState(*this); ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, /*MustBeDeclaration=*/false); Line->Level += SkipIndent ? 0 : 1; parseLevel(OpeningBrace); flushComments(isOnNewLine(*FormatTok)); Line->Level -= SkipIndent ? 0 : 1; } nextToken(); } void UnwrappedLineParser::parsePPDirective() { assert(FormatTok->is(tok::hash) && "'#' expected"); ScopedMacroState MacroState(*Line, Tokens, FormatTok); nextToken(); if (!FormatTok->Tok.getIdentifierInfo()) { parsePPUnknown(); return; } switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { case tok::pp_define: parsePPDefine(); return; case tok::pp_if: parsePPIf(/*IfDef=*/false); break; case tok::pp_ifdef: case tok::pp_ifndef: parsePPIf(/*IfDef=*/true); break; case tok::pp_else: case tok::pp_elifdef: case tok::pp_elifndef: case tok::pp_elif: parsePPElse(); break; case tok::pp_endif: parsePPEndIf(); break; case tok::pp_pragma: parsePPPragma(); break; default: parsePPUnknown(); break; } } void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { size_t Line = CurrentLines->size(); if (CurrentLines == &PreprocessorDirectives) Line += Lines.size(); if (Unreachable || (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { PPStack.push_back({PP_Unreachable, Line}); } else { PPStack.push_back({PP_Conditional, Line}); } } void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { ++PPBranchLevel; assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { PPLevelBranchIndex.push_back(0); PPLevelBranchCount.push_back(0); } PPChainBranchIndex.push(Unreachable ? -1 : 0); bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; conditionalCompilationCondition(Unreachable || Skip); } void UnwrappedLineParser::conditionalCompilationAlternative() { if (!PPStack.empty()) PPStack.pop_back(); assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (!PPChainBranchIndex.empty()) ++PPChainBranchIndex.top(); conditionalCompilationCondition( PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); } void UnwrappedLineParser::conditionalCompilationEnd() { assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; } // Guard against #endif's without #if. if (PPBranchLevel > -1) --PPBranchLevel; if (!PPChainBranchIndex.empty()) PPChainBranchIndex.pop(); if (!PPStack.empty()) PPStack.pop_back(); } void UnwrappedLineParser::parsePPIf(bool IfDef) { bool IfNDef = FormatTok->is(tok::pp_ifndef); nextToken(); bool Unreachable = false; if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) Unreachable = true; if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") Unreachable = true; conditionalCompilationStart(Unreachable); FormatToken *IfCondition = FormatTok; // If there's a #ifndef on the first line, and the only lines before it are // comments, it could be an include guard. bool MaybeIncludeGuard = IfNDef; if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { for (auto &Line : Lines) { if (Line.Tokens.front().Tok->isNot(tok::comment)) { MaybeIncludeGuard = false; IncludeGuard = IG_Rejected; break; } } } --PPBranchLevel; parsePPUnknown(); ++PPBranchLevel; if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { IncludeGuard = IG_IfNdefed; IncludeGuardToken = IfCondition; } } void UnwrappedLineParser::parsePPElse() { // If a potential include guard has an #else, it's not an include guard. if (IncludeGuard == IG_Defined && PPBranchLevel == 0) IncludeGuard = IG_Rejected; // Don't crash when there is an #else without an #if. assert(PPBranchLevel >= -1); if (PPBranchLevel == -1) conditionalCompilationStart(/*Unreachable=*/true); conditionalCompilationAlternative(); --PPBranchLevel; parsePPUnknown(); ++PPBranchLevel; } void UnwrappedLineParser::parsePPEndIf() { conditionalCompilationEnd(); parsePPUnknown(); // If the #endif of a potential include guard is the last thing in the file, // then we found an include guard. if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && Style.IndentPPDirectives != FormatStyle::PPDIS_None) { IncludeGuard = IG_Found; } } void UnwrappedLineParser::parsePPDefine() { nextToken(); if (!FormatTok->Tok.getIdentifierInfo()) { IncludeGuard = IG_Rejected; IncludeGuardToken = nullptr; parsePPUnknown(); return; } if (IncludeGuard == IG_IfNdefed && IncludeGuardToken->TokenText == FormatTok->TokenText) { IncludeGuard = IG_Defined; IncludeGuardToken = nullptr; for (auto &Line : Lines) { if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { IncludeGuard = IG_Rejected; break; } } } // In the context of a define, even keywords should be treated as normal // identifiers. Setting the kind to identifier is not enough, because we need // to treat additional keywords like __except as well, which are already // identifiers. Setting the identifier info to null interferes with include // guard processing above, and changes preprocessing nesting. FormatTok->Tok.setKind(tok::identifier); FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); nextToken(); if (FormatTok->Tok.getKind() == tok::l_paren && !FormatTok->hasWhitespaceBefore()) { parseParens(); } if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) Line->Level += PPBranchLevel + 1; addUnwrappedLine(); ++Line->Level; Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); assert((int)Line->PPLevel >= 0); Line->InMacroBody = true; if (Style.SkipMacroDefinitionBody) { while (!eof()) { FormatTok->Finalized = true; FormatTok = Tokens->getNextToken(); } addUnwrappedLine(); return; } // Errors during a preprocessor directive can only affect the layout of the // preprocessor directive, and thus we ignore them. An alternative approach // would be to use the same approach we use on the file level (no // re-indentation if there was a structural error) within the macro // definition. parseFile(); } void UnwrappedLineParser::parsePPPragma() { Line->InPragmaDirective = true; parsePPUnknown(); } void UnwrappedLineParser::parsePPUnknown() { do { nextToken(); } while (!eof()); if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) Line->Level += PPBranchLevel + 1; addUnwrappedLine(); } // Here we exclude certain tokens that are not usually the first token in an // unwrapped line. This is used in attempt to distinguish macro calls without // trailing semicolons from other constructs split to several lines. static bool tokenCanStartNewLine(const FormatToken &Tok) { // Semicolon can be a null-statement, l_square can be a start of a macro or // a C++11 attribute, but this doesn't seem to be common. return !Tok.isOneOf(tok::semi, tok::l_brace, // Tokens that can only be used as binary operators and a // part of overloaded operator names. tok::period, tok::periodstar, tok::arrow, tok::arrowstar, tok::less, tok::greater, tok::slash, tok::percent, tok::lessless, tok::greatergreater, tok::equal, tok::plusequal, tok::minusequal, tok::starequal, tok::slashequal, tok::percentequal, tok::ampequal, tok::pipeequal, tok::caretequal, tok::greatergreaterequal, tok::lesslessequal, // Colon is used in labels, base class lists, initializer // lists, range-based for loops, ternary operator, but // should never be the first token in an unwrapped line. tok::colon, // 'noexcept' is a trailing annotation. tok::kw_noexcept); } static bool mustBeJSIdent(const AdditionalKeywords &Keywords, const FormatToken *FormatTok) { // FIXME: This returns true for C/C++ keywords like 'struct'. return FormatTok->is(tok::identifier) && (!FormatTok->Tok.getIdentifierInfo() || !FormatTok->isOneOf( Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, Keywords.kw_let, Keywords.kw_var, tok::kw_const, Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); } static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, const FormatToken *FormatTok) { return FormatTok->Tok.isLiteral() || FormatTok->isOneOf(tok::kw_true, tok::kw_false) || mustBeJSIdent(Keywords, FormatTok); } // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement // when encountered after a value (see mustBeJSIdentOrValue). static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, const FormatToken *FormatTok) { return FormatTok->isOneOf( tok::kw_return, Keywords.kw_yield, // conditionals tok::kw_if, tok::kw_else, // loops tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, // switch/case tok::kw_switch, tok::kw_case, // exceptions tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, // declaration tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, Keywords.kw_async, Keywords.kw_function, // import/export Keywords.kw_import, tok::kw_export); } // Checks whether a token is a type in K&R C (aka C78). static bool isC78Type(const FormatToken &Tok) { return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, tok::kw_unsigned, tok::kw_float, tok::kw_double, tok::identifier); } // This function checks whether a token starts the first parameter declaration // in a K&R C (aka C78) function definition, e.g.: // int f(a, b) // short a, b; // { // return a + b; // } static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, const FormatToken *FuncName) { assert(Tok); assert(Next); assert(FuncName); if (FuncName->isNot(tok::identifier)) return false; const FormatToken *Prev = FuncName->Previous; if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) return false; if (!isC78Type(*Tok) && !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { return false; } if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) return false; Tok = Tok->Previous; if (!Tok || Tok->isNot(tok::r_paren)) return false; Tok = Tok->Previous; if (!Tok || Tok->isNot(tok::identifier)) return false; return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); } bool UnwrappedLineParser::parseModuleImport() { assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); !Token->Tok.getIdentifierInfo() && !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { return false; } nextToken(); while (!eof()) { if (FormatTok->is(tok::colon)) { FormatTok->setFinalizedType(TT_ModulePartitionColon); } // Handle import as we would an include statement. else if (FormatTok->is(tok::less)) { nextToken(); while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { // Mark tokens up to the trailing line comments as implicit string // literals. if (FormatTok->isNot(tok::comment) && !FormatTok->TokenText.starts_with("//")) { FormatTok->setFinalizedType(TT_ImplicitStringLiteral); } nextToken(); } } if (FormatTok->is(tok::semi)) { nextToken(); break; } nextToken(); } addUnwrappedLine(); return true; } // readTokenWithJavaScriptASI reads the next token and terminates the current // line if JavaScript Automatic Semicolon Insertion must // happen between the current token and the next token. // // This method is conservative - it cannot cover all edge cases of JavaScript, // but only aims to correctly handle certain well known cases. It *must not* // return true in speculative cases. void UnwrappedLineParser::readTokenWithJavaScriptASI() { FormatToken *Previous = FormatTok; readToken(); FormatToken *Next = FormatTok; bool IsOnSameLine = CommentsBeforeNextToken.empty() ? Next->NewlinesBefore == 0 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; if (IsOnSameLine) return; bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); bool PreviousStartsTemplateExpr = Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); if (PreviousMustBeValue || Previous->is(tok::r_paren)) { // If the line contains an '@' sign, the previous token might be an // annotation, which can precede another identifier/value. bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { return LineNode.Tok->is(tok::at); }); if (HasAt) return; } if (Next->is(tok::exclaim) && PreviousMustBeValue) return addUnwrappedLine(); bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); bool NextEndsTemplateExpr = Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && (PreviousMustBeValue || Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, tok::minusminus))) { return addUnwrappedLine(); } if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && isJSDeclOrStmt(Keywords, Next)) { return addUnwrappedLine(); } } void UnwrappedLineParser::parseStructuralElement( const FormatToken *OpeningBrace, IfStmtKind *IfKind, FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { if (Style.Language == FormatStyle::LK_TableGen && FormatTok->is(tok::pp_include)) { nextToken(); if (FormatTok->is(tok::string_literal)) nextToken(); addUnwrappedLine(); return; } if (IsCpp) { while (FormatTok->is(tok::l_square) && handleCppAttributes()) { } } else if (Style.isVerilog()) { if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { parseForOrWhileLoop(/*HasParens=*/false); return; } if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { parseForOrWhileLoop(); return; } if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, Keywords.kw_assume, Keywords.kw_cover)) { parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); return; } // Skip things that can exist before keywords like 'if' and 'case'. while (true) { if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, Keywords.kw_unique0)) { nextToken(); } else if (FormatTok->is(tok::l_paren) && Tokens->peekNextToken()->is(tok::star)) { parseParens(); } else { break; } } } // Tokens that only make sense at the beginning of a line. if (FormatTok->isAccessSpecifierKeyword()) { if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || Style.isCSharp()) { nextToken(); } else { parseAccessSpecifier(); } return; } switch (FormatTok->Tok.getKind()) { case tok::kw_asm: nextToken(); if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); while (FormatTok && !eof()) { if (FormatTok->is(tok::r_brace)) { FormatTok->setFinalizedType(TT_InlineASMBrace); nextToken(); addUnwrappedLine(); break; } FormatTok->Finalized = true; nextToken(); } } break; case tok::kw_namespace: parseNamespace(); return; case tok::kw_if: { if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } FormatToken *Tok = parseIfThenElse(IfKind); if (IfLeftBrace) *IfLeftBrace = Tok; return; } case tok::kw_for: case tok::kw_while: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } parseForOrWhileLoop(); return; case tok::kw_do: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } parseDoWhile(); if (HasDoWhile) *HasDoWhile = true; return; case tok::kw_switch: if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'switch: string' field declaration. break; } parseSwitch(/*IsExpr=*/false); return; case tok::kw_default: { // In Verilog default along with other labels are handled in the next loop. if (Style.isVerilog()) break; if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'default: string' field declaration. break; } auto *Default = FormatTok; nextToken(); if (FormatTok->is(tok::colon)) { FormatTok->setFinalizedType(TT_CaseLabelColon); parseLabel(); return; } if (FormatTok->is(tok::arrow)) { FormatTok->setFinalizedType(TT_CaseLabelArrow); Default->setFinalizedType(TT_SwitchExpressionLabel); parseLabel(); return; } // e.g. "default void f() {}" in a Java interface. break; } case tok::kw_case: // Proto: there are no switch/case statements. if (Style.Language == FormatStyle::LK_Proto) { nextToken(); return; } if (Style.isVerilog()) { parseBlock(); addUnwrappedLine(); return; } if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'case: string' field declaration. nextToken(); break; } parseCaseLabel(); return; case tok::kw_try: case tok::kw___try: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. break; } parseTryCatch(); return; case tok::kw_extern: nextToken(); if (Style.isVerilog()) { // In Verilog and extern module declaration looks like a start of module. // But there is no body and endmodule. So we handle it separately. if (Keywords.isVerilogHierarchy(*FormatTok)) { parseVerilogHierarchyHeader(); return; } } else if (FormatTok->is(tok::string_literal)) { nextToken(); if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterExternBlock) addUnwrappedLine(); // Either we indent or for backwards compatibility we follow the // AfterExternBlock style. unsigned AddLevels = (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || (Style.BraceWrapping.AfterExternBlock && Style.IndentExternBlock == FormatStyle::IEBS_AfterExternBlock) ? 1u : 0u; parseBlock(/*MustBeDeclaration=*/true, AddLevels); addUnwrappedLine(); return; } } break; case tok::kw_export: if (Style.isJavaScript()) { parseJavaScriptEs6ImportExport(); return; } if (IsCpp) { nextToken(); if (FormatTok->is(tok::kw_namespace)) { parseNamespace(); return; } if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) return; } break; case tok::kw_inline: nextToken(); if (FormatTok->is(tok::kw_namespace)) { parseNamespace(); return; } break; case tok::identifier: if (FormatTok->is(TT_ForEachMacro)) { parseForOrWhileLoop(); return; } if (FormatTok->is(TT_MacroBlockBegin)) { parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/false); return; } if (FormatTok->is(Keywords.kw_import)) { if (Style.isJavaScript()) { parseJavaScriptEs6ImportExport(); return; } if (Style.Language == FormatStyle::LK_Proto) { nextToken(); if (FormatTok->is(tok::kw_public)) nextToken(); if (FormatTok->isNot(tok::string_literal)) return; nextToken(); if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); return; } if (IsCpp && parseModuleImport()) return; } if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, Keywords.kw_slots, Keywords.kw_qslots)) { nextToken(); if (FormatTok->is(tok::colon)) { nextToken(); addUnwrappedLine(); return; } } if (IsCpp && FormatTok->is(TT_StatementMacro)) { parseStatementMacro(); return; } if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { parseNamespace(); return; } // In Verilog labels can be any expression, so we don't do them here. // JS doesn't have macros, and within classes colons indicate fields, not // labels. // TableGen doesn't have labels. if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { nextToken(); if (!Line->InMacroBody || CurrentLines->size() > 1) Line->Tokens.begin()->Tok->MustBreakBefore = true; FormatTok->setFinalizedType(TT_GotoLabelColon); parseLabel(!Style.IndentGotoLabels); if (HasLabel) *HasLabel = true; return; } // In all other cases, parse the declaration. break; default: break; } for (const bool InRequiresExpression = OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); !eof();) { if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); Next && Next->isBinaryOperator()) { FormatTok->Tok.setKind(tok::identifier); } } const FormatToken *Previous = FormatTok->Previous; switch (FormatTok->Tok.getKind()) { case tok::at: nextToken(); if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); break; } else if (Style.Language == FormatStyle::LK_Java && FormatTok->is(Keywords.kw_interface)) { nextToken(); break; } switch (FormatTok->Tok.getObjCKeywordID()) { case tok::objc_public: case tok::objc_protected: case tok::objc_package: case tok::objc_private: return parseAccessSpecifier(); case tok::objc_interface: case tok::objc_implementation: return parseObjCInterfaceOrImplementation(); case tok::objc_protocol: if (parseObjCProtocol()) return; break; case tok::objc_end: return; // Handled by the caller. case tok::objc_optional: case tok::objc_required: nextToken(); addUnwrappedLine(); return; case tok::objc_autoreleasepool: nextToken(); if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); } parseBlock(); } addUnwrappedLine(); return; case tok::objc_synchronized: nextToken(); if (FormatTok->is(tok::l_paren)) { // Skip synchronization object parseParens(); } if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); } parseBlock(); } addUnwrappedLine(); return; case tok::objc_try: // This branch isn't strictly necessary (the kw_try case below would // do this too after the tok::at is parsed above). But be explicit. parseTryCatch(); return; default: break; } break; case tok::kw_requires: { if (IsCpp) { bool ParsedClause = parseRequires(); if (ParsedClause) return; } else { nextToken(); } break; } case tok::kw_enum: // Ignore if this is part of "template enum" or // "template <..., enum ...>". if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { nextToken(); break; } // parseEnum falls through and does not yet add an unwrapped line as an // enum definition can start a structural element. if (!parseEnum()) break; // This only applies to C++ and Verilog. if (!IsCpp && !Style.isVerilog()) { addUnwrappedLine(); return; } break; case tok::kw_typedef: nextToken(); if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, Keywords.kw_CF_CLOSED_ENUM, Keywords.kw_NS_CLOSED_ENUM)) { parseEnum(); } break; case tok::kw_class: if (Style.isVerilog()) { parseBlock(); addUnwrappedLine(); return; } if (Style.isTableGen()) { // Do nothing special. In this case the l_brace becomes FunctionLBrace. // This is same as def and so on. nextToken(); break; } [[fallthrough]]; case tok::kw_struct: case tok::kw_union: if (parseStructLike()) return; break; case tok::kw_decltype: nextToken(); if (FormatTok->is(tok::l_paren)) { parseParens(); assert(FormatTok->Previous); if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, tok::l_paren)) { Line->SeenDecltypeAuto = true; } } break; case tok::period: nextToken(); // In Java, classes have an implicit static member "class". if (Style.Language == FormatStyle::LK_Java && FormatTok && FormatTok->is(tok::kw_class)) { nextToken(); } if (Style.isJavaScript() && FormatTok && FormatTok->Tok.getIdentifierInfo()) { // JavaScript only has pseudo keywords, all keywords are allowed to // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 nextToken(); } break; case tok::semi: nextToken(); addUnwrappedLine(); return; case tok::r_brace: addUnwrappedLine(); return; case tok::l_paren: { parseParens(); // Break the unwrapped line if a K&R C function definition has a parameter // declaration. if (OpeningBrace || !IsCpp || !Previous || eof()) break; if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(/*SkipComment=*/true), Previous)) { addUnwrappedLine(); return; } break; } case tok::kw_operator: nextToken(); if (FormatTok->isBinaryOperator()) nextToken(); break; case tok::caret: nextToken(); // Block return type. if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { nextToken(); // Return types: pointers are ok too. while (FormatTok->is(tok::star)) nextToken(); } // Block argument list. if (FormatTok->is(tok::l_paren)) parseParens(); // Block body. if (FormatTok->is(tok::l_brace)) parseChildBlock(); break; case tok::l_brace: if (InRequiresExpression) FormatTok->setFinalizedType(TT_BracedListLBrace); if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { IsDecltypeAutoFunction = Line->SeenDecltypeAuto; // A block outside of parentheses must be the last part of a // structural element. // FIXME: Figure out cases where this is not true, and add projections // for them (the one we know is missing are lambdas). if (Style.Language == FormatStyle::LK_Java && Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { // If necessary, we could set the type to something different than // TT_FunctionLBrace. if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); } } else if (Style.BraceWrapping.AfterFunction) { addUnwrappedLine(); } if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) FormatTok->setFinalizedType(TT_FunctionLBrace); parseBlock(); IsDecltypeAutoFunction = false; addUnwrappedLine(); return; } // Otherwise this was a braced init list, and the structural // element continues. break; case tok::kw_try: if (Style.isJavaScript() && Line->MustBeDeclaration) { // field/method declaration. nextToken(); break; } // We arrive here when parsing function-try blocks. if (Style.BraceWrapping.AfterFunction) addUnwrappedLine(); parseTryCatch(); return; case tok::identifier: { if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && Line->MustBeDeclaration) { addUnwrappedLine(); parseCSharpGenericTypeConstraint(); break; } if (FormatTok->is(TT_MacroBlockEnd)) { addUnwrappedLine(); return; } // Function declarations (as opposed to function expressions) are parsed // on their own unwrapped line by continuing this loop. Function // expressions (functions that are not on their own line) must not create // a new unwrapped line, so they are special cased below. size_t TokenCount = Line->Tokens.size(); if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && (TokenCount > 1 || (TokenCount == 1 && Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { tryToParseJSFunction(); break; } if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && FormatTok->is(Keywords.kw_interface)) { if (Style.isJavaScript()) { // In JavaScript/TypeScript, "interface" can be used as a standalone // identifier, e.g. in `var interface = 1;`. If "interface" is // followed by another identifier, it is very like to be an actual // interface declaration. unsigned StoredPosition = Tokens->getPosition(); FormatToken *Next = Tokens->getNextToken(); FormatTok = Tokens->setPosition(StoredPosition); if (!mustBeJSIdent(Keywords, Next)) { nextToken(); break; } } parseRecord(); addUnwrappedLine(); return; } if (Style.isVerilog()) { if (FormatTok->is(Keywords.kw_table)) { parseVerilogTable(); return; } if (Keywords.isVerilogBegin(*FormatTok) || Keywords.isVerilogHierarchy(*FormatTok)) { parseBlock(); addUnwrappedLine(); return; } } if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { if (parseStructLike()) return; break; } if (IsCpp && FormatTok->is(TT_StatementMacro)) { parseStatementMacro(); return; } // See if the following token should start a new unwrapped line. StringRef Text = FormatTok->TokenText; FormatToken *PreviousToken = FormatTok; nextToken(); // JS doesn't have macros, and within classes colons indicate fields, not // labels. if (Style.isJavaScript()) break; auto OneTokenSoFar = [&]() { auto I = Line->Tokens.begin(), E = Line->Tokens.end(); while (I != E && I->Tok->is(tok::comment)) ++I; if (Style.isVerilog()) while (I != E && I->Tok->is(tok::hash)) ++I; return I != E && (++I == E); }; if (OneTokenSoFar()) { // Recognize function-like macro usages without trailing semicolon as // well as free-standing macros like Q_OBJECT. bool FunctionLike = FormatTok->is(tok::l_paren); if (FunctionLike) parseParens(); bool FollowedByNewline = CommentsBeforeNextToken.empty() ? FormatTok->NewlinesBefore > 0 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { if (PreviousToken->isNot(TT_UntouchableMacroFunc)) PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); addUnwrappedLine(); return; } } break; } case tok::equal: if ((Style.isJavaScript() || Style.isCSharp()) && FormatTok->is(TT_FatArrow)) { tryToParseChildBlock(); break; } nextToken(); if (FormatTok->is(tok::l_brace)) { // Block kind should probably be set to BK_BracedInit for any language. // C# needs this change to ensure that array initialisers and object // initialisers are indented the same way. if (Style.isCSharp()) FormatTok->setBlockKind(BK_BracedInit); // TableGen's defset statement has syntax of the form, // `defset = { ... }` if (Style.isTableGen() && Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { FormatTok->setFinalizedType(TT_FunctionLBrace); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/false); addUnwrappedLine(); break; } nextToken(); parseBracedList(); } else if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::less)) { nextToken(); parseBracedList(/*IsAngleBracket=*/true); } break; case tok::l_square: parseSquare(); break; case tok::kw_new: parseNew(); break; case tok::kw_switch: if (Style.Language == FormatStyle::LK_Java) parseSwitch(/*IsExpr=*/true); nextToken(); break; case tok::kw_case: // Proto: there are no switch/case statements. if (Style.Language == FormatStyle::LK_Proto) { nextToken(); return; } // In Verilog switch is called case. if (Style.isVerilog()) { parseBlock(); addUnwrappedLine(); return; } if (Style.isJavaScript() && Line->MustBeDeclaration) { // 'case: string' field declaration. nextToken(); break; } parseCaseLabel(); break; case tok::kw_default: nextToken(); if (Style.isVerilog()) { if (FormatTok->is(tok::colon)) { // The label will be handled in the next iteration. break; } if (FormatTok->is(Keywords.kw_clocking)) { // A default clocking block. parseBlock(); addUnwrappedLine(); return; } parseVerilogCaseLabel(); return; } break; case tok::colon: nextToken(); if (Style.isVerilog()) { parseVerilogCaseLabel(); return; } break; case tok::greater: nextToken(); if (FormatTok->is(tok::l_brace)) FormatTok->Previous->setFinalizedType(TT_TemplateCloser); break; default: nextToken(); break; } } } bool UnwrappedLineParser::tryToParsePropertyAccessor() { assert(FormatTok->is(tok::l_brace)); if (!Style.isCSharp()) return false; // See if it's a property accessor. if (FormatTok->Previous->isNot(tok::identifier)) return false; // See if we are inside a property accessor. // // Record the current tokenPosition so that we can advance and // reset the current token. `Next` is not set yet so we need // another way to advance along the token stream. unsigned int StoredPosition = Tokens->getPosition(); FormatToken *Tok = Tokens->getNextToken(); // A trivial property accessor is of the form: // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } // Track these as they do not require line breaks to be introduced. bool HasSpecialAccessor = false; bool IsTrivialPropertyAccessor = true; while (!eof()) { if (Tok->isAccessSpecifierKeyword() || Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) { if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) HasSpecialAccessor = true; Tok = Tokens->getNextToken(); continue; } if (Tok->isNot(tok::r_brace)) IsTrivialPropertyAccessor = false; break; } if (!HasSpecialAccessor) { Tokens->setPosition(StoredPosition); return false; } // Try to parse the property accessor: // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties Tokens->setPosition(StoredPosition); if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) addUnwrappedLine(); nextToken(); do { switch (FormatTok->Tok.getKind()) { case tok::r_brace: nextToken(); if (FormatTok->is(tok::equal)) { while (!eof() && FormatTok->isNot(tok::semi)) nextToken(); nextToken(); } addUnwrappedLine(); return true; case tok::l_brace: ++Line->Level; parseBlock(/*MustBeDeclaration=*/true); addUnwrappedLine(); --Line->Level; break; case tok::equal: if (FormatTok->is(TT_FatArrow)) { ++Line->Level; do { nextToken(); } while (!eof() && FormatTok->isNot(tok::semi)); nextToken(); addUnwrappedLine(); --Line->Level; break; } nextToken(); break; default: if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) && !IsTrivialPropertyAccessor) { // Non-trivial get/set needs to be on its own line. addUnwrappedLine(); } nextToken(); } } while (!eof()); // Unreachable for well-formed code (paired '{' and '}'). return true; } bool UnwrappedLineParser::tryToParseLambda() { assert(FormatTok->is(tok::l_square)); if (!IsCpp) { nextToken(); return false; } FormatToken &LSquare = *FormatTok; if (!tryToParseLambdaIntroducer()) return false; bool SeenArrow = false; bool InTemplateParameterList = false; while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { nextToken(); continue; } switch (FormatTok->Tok.getKind()) { case tok::l_brace: break; case tok::l_paren: parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); break; case tok::l_square: parseSquare(); break; case tok::less: assert(FormatTok->Previous); if (FormatTok->Previous->is(tok::r_square)) InTemplateParameterList = true; nextToken(); break; case tok::kw_auto: case tok::kw_class: case tok::kw_struct: case tok::kw_union: case tok::kw_template: case tok::kw_typename: case tok::amp: case tok::star: case tok::kw_const: case tok::kw_constexpr: case tok::kw_consteval: case tok::comma: case tok::greater: case tok::identifier: case tok::numeric_constant: case tok::coloncolon: case tok::kw_mutable: case tok::kw_noexcept: case tok::kw_static: nextToken(); break; // Specialization of a template with an integer parameter can contain // arithmetic, logical, comparison and ternary operators. // // FIXME: This also accepts sequences of operators that are not in the scope // of a template argument list. // // In a C++ lambda a template type can only occur after an arrow. We use // this as an heuristic to distinguish between Objective-C expressions // followed by an `a->b` expression, such as: // ([obj func:arg] + a->b) // Otherwise the code below would parse as a lambda. case tok::plus: case tok::minus: case tok::exclaim: case tok::tilde: case tok::slash: case tok::percent: case tok::lessless: case tok::pipe: case tok::pipepipe: case tok::ampamp: case tok::caret: case tok::equalequal: case tok::exclaimequal: case tok::greaterequal: case tok::lessequal: case tok::question: case tok::colon: case tok::ellipsis: case tok::kw_true: case tok::kw_false: if (SeenArrow || InTemplateParameterList) { nextToken(); break; } return true; case tok::arrow: // This might or might not actually be a lambda arrow (this could be an // ObjC method invocation followed by a dereferencing arrow). We might // reset this back to TT_Unknown in TokenAnnotator. FormatTok->setFinalizedType(TT_LambdaArrow); SeenArrow = true; nextToken(); break; case tok::kw_requires: { auto *RequiresToken = FormatTok; nextToken(); parseRequiresClause(RequiresToken); break; } case tok::equal: if (!InTemplateParameterList) return true; nextToken(); break; default: return true; } } FormatTok->setFinalizedType(TT_LambdaLBrace); LSquare.setFinalizedType(TT_LambdaLSquare); NestedLambdas.push_back(Line->SeenDecltypeAuto); parseChildBlock(); assert(!NestedLambdas.empty()); NestedLambdas.pop_back(); return true; } bool UnwrappedLineParser::tryToParseLambdaIntroducer() { const FormatToken *Previous = FormatTok->Previous; const FormatToken *LeftSquare = FormatTok; nextToken(); if ((Previous && ((Previous->Tok.getIdentifierInfo() && !Previous->isOneOf(tok::kw_return, tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return)) || Previous->closesScope())) || LeftSquare->isCppStructuredBinding(IsCpp)) { return false; } if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) return false; if (FormatTok->is(tok::r_square)) { const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); if (Next->is(tok::greater)) return false; } parseSquare(/*LambdaIntroducer=*/true); return true; } void UnwrappedLineParser::tryToParseJSFunction() { assert(FormatTok->is(Keywords.kw_function)); if (FormatTok->is(Keywords.kw_async)) nextToken(); // Consume "function". nextToken(); // Consume * (generator function). Treat it like C++'s overloaded operators. if (FormatTok->is(tok::star)) { FormatTok->setFinalizedType(TT_OverloadedOperator); nextToken(); } // Consume function name. if (FormatTok->is(tok::identifier)) nextToken(); if (FormatTok->isNot(tok::l_paren)) return; // Parse formal parameter list. parseParens(); if (FormatTok->is(tok::colon)) { // Parse a type definition. nextToken(); // Eat the type declaration. For braced inline object types, balance braces, // otherwise just parse until finding an l_brace for the function body. if (FormatTok->is(tok::l_brace)) tryToParseBracedList(); else while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) nextToken(); } if (FormatTok->is(tok::semi)) return; parseChildBlock(); } bool UnwrappedLineParser::tryToParseBracedList() { if (FormatTok->is(BK_Unknown)) calculateBraceTypes(); assert(FormatTok->isNot(BK_Unknown)); if (FormatTok->is(BK_Block)) return false; nextToken(); parseBracedList(); return true; } bool UnwrappedLineParser::tryToParseChildBlock() { assert(Style.isJavaScript() || Style.isCSharp()); assert(FormatTok->is(TT_FatArrow)); // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. // They always start an expression or a child block if followed by a curly // brace. nextToken(); if (FormatTok->isNot(tok::l_brace)) return false; parseChildBlock(); return true; } bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { assert(!IsAngleBracket || !IsEnum); bool HasError = false; // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssignmentExpression() inside. do { if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && tryToParseChildBlock()) { continue; } if (Style.isJavaScript()) { if (FormatTok->is(Keywords.kw_function)) { tryToParseJSFunction(); continue; } if (FormatTok->is(tok::l_brace)) { // Could be a method inside of a braced list `{a() { return 1; }}`. if (tryToParseBracedList()) continue; parseChildBlock(); } } if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { if (IsEnum) { FormatTok->setBlockKind(BK_Block); if (!Style.AllowShortEnumsOnASingleLine) addUnwrappedLine(); } nextToken(); return !HasError; } switch (FormatTok->Tok.getKind()) { case tok::l_square: if (Style.isCSharp()) parseSquare(); else tryToParseLambda(); break; case tok::l_paren: parseParens(); // JavaScript can just have free standing methods and getters/setters in // object literals. Detect them by a "{" following ")". if (Style.isJavaScript()) { if (FormatTok->is(tok::l_brace)) parseChildBlock(); break; } break; case tok::l_brace: // Assume there are no blocks inside a braced init list apart // from the ones we explicitly parse out (like lambdas). FormatTok->setBlockKind(BK_BracedInit); + if (!IsAngleBracket) { + auto *Prev = FormatTok->Previous; + if (Prev && Prev->is(tok::greater)) + Prev->setFinalizedType(TT_TemplateCloser); + } nextToken(); parseBracedList(); break; case tok::less: nextToken(); if (IsAngleBracket) parseBracedList(/*IsAngleBracket=*/true); break; case tok::semi: // JavaScript (or more precisely TypeScript) can have semicolons in braced // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be // used for error recovery if we have otherwise determined that this is // a braced list. if (Style.isJavaScript()) { nextToken(); break; } HasError = true; if (!IsEnum) return false; nextToken(); break; case tok::comma: nextToken(); if (IsEnum && !Style.AllowShortEnumsOnASingleLine) addUnwrappedLine(); break; default: nextToken(); break; } } while (!eof()); return false; } /// \brief Parses a pair of parentheses (and everything between them). /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all /// double ampersands. This applies for all nested scopes as well. /// /// Returns whether there is a `=` token between the parentheses. bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { assert(FormatTok->is(tok::l_paren) && "'(' expected."); auto *LeftParen = FormatTok; bool SeenEqual = false; bool MightBeFoldExpr = false; const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); nextToken(); do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: if (parseParens(AmpAmpTokenType)) SeenEqual = true; if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) parseChildBlock(); break; case tok::r_paren: { auto *Prev = LeftParen->Previous; if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && Style.RemoveParentheses > FormatStyle::RPS_Leave) { const auto *Next = Tokens->peekNextToken(); const bool DoubleParens = Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; const bool Blacklisted = PrevPrev && (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || (SeenEqual && (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); const bool ReturnParens = Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || (!NestedLambdas.empty() && !NestedLambdas.back())) && Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && Next->is(tok::semi); if ((DoubleParens && !Blacklisted) || ReturnParens) { LeftParen->Optional = true; FormatTok->Optional = true; } } if (Prev) { if (Prev->is(TT_TypenameMacro)) { LeftParen->setFinalizedType(TT_TypeDeclarationParen); FormatTok->setFinalizedType(TT_TypeDeclarationParen); } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) { Prev->setFinalizedType(TT_TemplateCloser); } } nextToken(); return SeenEqual; } case tok::r_brace: // A "}" inside parenthesis is an error if there wasn't a matching "{". return SeenEqual; case tok::l_square: tryToParseLambda(); break; case tok::l_brace: if (!tryToParseBracedList()) parseChildBlock(); break; case tok::at: nextToken(); if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); } break; case tok::ellipsis: MightBeFoldExpr = true; nextToken(); break; case tok::equal: SeenEqual = true; if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) tryToParseChildBlock(); else nextToken(); break; case tok::kw_class: if (Style.isJavaScript()) parseRecord(/*ParseAsExpr=*/true); else nextToken(); break; case tok::identifier: if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) tryToParseJSFunction(); else nextToken(); break; case tok::kw_switch: parseSwitch(/*IsExpr=*/true); break; case tok::kw_requires: { auto RequiresToken = FormatTok; nextToken(); parseRequiresExpression(RequiresToken); break; } case tok::ampamp: if (AmpAmpTokenType != TT_Unknown) FormatTok->setFinalizedType(AmpAmpTokenType); [[fallthrough]]; default: nextToken(); break; } } while (!eof()); return SeenEqual; } void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { if (!LambdaIntroducer) { assert(FormatTok->is(tok::l_square) && "'[' expected."); if (tryToParseLambda()) return; } do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: parseParens(); break; case tok::r_square: nextToken(); return; case tok::r_brace: // A "}" inside parenthesis is an error if there wasn't a matching "{". return; case tok::l_square: parseSquare(); break; case tok::l_brace: { if (!tryToParseBracedList()) parseChildBlock(); break; } case tok::at: case tok::colon: nextToken(); if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); } break; default: nextToken(); break; } } while (!eof()); } void UnwrappedLineParser::keepAncestorBraces() { if (!Style.RemoveBracesLLVM) return; const int MaxNestingLevels = 2; const int Size = NestedTooDeep.size(); if (Size >= MaxNestingLevels) NestedTooDeep[Size - MaxNestingLevels] = true; NestedTooDeep.push_back(false); } static FormatToken *getLastNonComment(const UnwrappedLine &Line) { for (const auto &Token : llvm::reverse(Line.Tokens)) if (Token.Tok->isNot(tok::comment)) return Token.Tok; return nullptr; } void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { FormatToken *Tok = nullptr; if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never ? getLastNonComment(*Line) : Line->Tokens.back().Tok; assert(Tok); if (Tok->BraceCount < 0) { assert(Tok->BraceCount == -1); Tok = nullptr; } else { Tok->BraceCount = -1; } } addUnwrappedLine(); ++Line->Level; ++Line->UnbracedBodyLevel; parseStructuralElement(); --Line->UnbracedBodyLevel; if (Tok) { assert(!Line->InPPDirective); Tok = nullptr; for (const auto &L : llvm::reverse(*CurrentLines)) { if (!L.InPPDirective && getLastNonComment(L)) { Tok = L.Tokens.back().Tok; break; } } assert(Tok); ++Tok->BraceCount; } if (CheckEOF && eof()) addUnwrappedLine(); --Line->Level; } static void markOptionalBraces(FormatToken *LeftBrace) { if (!LeftBrace) return; assert(LeftBrace->is(tok::l_brace)); FormatToken *RightBrace = LeftBrace->MatchingParen; if (!RightBrace) { assert(!LeftBrace->Optional); return; } assert(RightBrace->is(tok::r_brace)); assert(RightBrace->MatchingParen == LeftBrace); assert(LeftBrace->Optional == RightBrace->Optional); LeftBrace->Optional = true; RightBrace->Optional = true; } void UnwrappedLineParser::handleAttributes() { // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. if (FormatTok->isAttribute()) nextToken(); else if (FormatTok->is(tok::l_square)) handleCppAttributes(); } bool UnwrappedLineParser::handleCppAttributes() { // Handle [[likely]] / [[unlikely]] attributes. assert(FormatTok->is(tok::l_square)); if (!tryToParseSimpleAttribute()) return false; parseSquare(); return true; } /// Returns whether \c Tok begins a block. bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { // FIXME: rename the function or make // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) : Tok.is(tok::l_brace); } FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, bool KeepBraces, bool IsVerilogAssert) { assert((FormatTok->is(tok::kw_if) || (Style.isVerilog() && FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, Keywords.kw_assume, Keywords.kw_cover))) && "'if' expected"); nextToken(); if (IsVerilogAssert) { // Handle `assert #0` and `assert final`. if (FormatTok->is(Keywords.kw_verilogHash)) { nextToken(); if (FormatTok->is(tok::numeric_constant)) nextToken(); } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, Keywords.kw_sequence)) { nextToken(); } } // TableGen's if statement has the form of `if then { ... }`. if (Style.isTableGen()) { while (!eof() && FormatTok->isNot(Keywords.kw_then)) { // Simply skip until then. This range only contains a value. nextToken(); } } // Handle `if !consteval`. if (FormatTok->is(tok::exclaim)) nextToken(); bool KeepIfBraces = true; if (FormatTok->is(tok::kw_consteval)) { nextToken(); } else { KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) nextToken(); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_ConditionLParen); parseParens(); } } handleAttributes(); // The then action is optional in Verilog assert statements. if (IsVerilogAssert && FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); return nullptr; } bool NeedsUnwrappedLine = false; keepAncestorBraces(); FormatToken *IfLeftBrace = nullptr; IfStmtKind IfBlockKind = IfStmtKind::NotIf; if (isBlockBegin(*FormatTok)) { FormatTok->setFinalizedType(TT_ControlStatementLBrace); IfLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); setPreviousRBraceType(TT_ControlStatementRBrace); if (Style.BraceWrapping.BeforeElse) addUnwrappedLine(); else NeedsUnwrappedLine = true; } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { addUnwrappedLine(); } else { parseUnbracedBody(); } if (Style.RemoveBracesLLVM) { assert(!NestedTooDeep.empty()); KeepIfBraces = KeepIfBraces || (IfLeftBrace && !IfLeftBrace->MatchingParen) || NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || IfBlockKind == IfStmtKind::IfElseIf; } bool KeepElseBraces = KeepIfBraces; FormatToken *ElseLeftBrace = nullptr; IfStmtKind Kind = IfStmtKind::IfOnly; if (FormatTok->is(tok::kw_else)) { if (Style.RemoveBracesLLVM) { NestedTooDeep.back() = false; Kind = IfStmtKind::IfElse; } nextToken(); handleAttributes(); if (isBlockBegin(*FormatTok)) { const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); FormatTok->setFinalizedType(TT_ElseLBrace); ElseLeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); IfStmtKind ElseBlockKind = IfStmtKind::NotIf; FormatToken *IfLBrace = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); setPreviousRBraceType(TT_ElseRBrace); if (FormatTok->is(tok::kw_else)) { KeepElseBraces = KeepElseBraces || ElseBlockKind == IfStmtKind::IfOnly || ElseBlockKind == IfStmtKind::IfElseIf; } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { KeepElseBraces = true; assert(ElseLeftBrace->MatchingParen); markOptionalBraces(ElseLeftBrace); } addUnwrappedLine(); } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { const FormatToken *Previous = Tokens->getPreviousToken(); assert(Previous); const bool IsPrecededByComment = Previous->is(tok::comment); if (IsPrecededByComment) { addUnwrappedLine(); ++Line->Level; } bool TooDeep = true; if (Style.RemoveBracesLLVM) { Kind = IfStmtKind::IfElseIf; TooDeep = NestedTooDeep.pop_back_val(); } ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); if (Style.RemoveBracesLLVM) NestedTooDeep.push_back(TooDeep); if (IsPrecededByComment) --Line->Level; } else { parseUnbracedBody(/*CheckEOF=*/true); } } else { KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; if (NeedsUnwrappedLine) addUnwrappedLine(); } if (!Style.RemoveBracesLLVM) return nullptr; assert(!NestedTooDeep.empty()); KeepElseBraces = KeepElseBraces || (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back(); NestedTooDeep.pop_back(); if (!KeepIfBraces && !KeepElseBraces) { markOptionalBraces(IfLeftBrace); markOptionalBraces(ElseLeftBrace); } else if (IfLeftBrace) { FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; if (IfRightBrace) { assert(IfRightBrace->MatchingParen == IfLeftBrace); assert(!IfLeftBrace->Optional); assert(!IfRightBrace->Optional); IfLeftBrace->MatchingParen = nullptr; IfRightBrace->MatchingParen = nullptr; } } if (IfKind) *IfKind = Kind; return IfLeftBrace; } void UnwrappedLineParser::parseTryCatch() { assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); nextToken(); bool NeedsUnwrappedLine = false; bool HasCtorInitializer = false; if (FormatTok->is(tok::colon)) { auto *Colon = FormatTok; // We are in a function try block, what comes is an initializer list. nextToken(); if (FormatTok->is(tok::identifier)) { HasCtorInitializer = true; Colon->setFinalizedType(TT_CtorInitializerColon); } // In case identifiers were removed by clang-tidy, what might follow is // multiple commas in sequence - before the first identifier. while (FormatTok->is(tok::comma)) nextToken(); while (FormatTok->is(tok::identifier)) { nextToken(); if (FormatTok->is(tok::l_paren)) { parseParens(); } else if (FormatTok->is(tok::l_brace)) { nextToken(); parseBracedList(); } // In case identifiers were removed by clang-tidy, what might follow is // multiple commas in sequence - after the first identifier. while (FormatTok->is(tok::comma)) nextToken(); } } // Parse try with resource. if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) parseParens(); keepAncestorBraces(); if (FormatTok->is(tok::l_brace)) { if (HasCtorInitializer) FormatTok->setFinalizedType(TT_FunctionLBrace); CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(); if (Style.BraceWrapping.BeforeCatch) addUnwrappedLine(); else NeedsUnwrappedLine = true; } else if (FormatTok->isNot(tok::kw_catch)) { // The C++ standard requires a compound-statement after a try. // If there's none, we try to assume there's a structuralElement // and try to continue. addUnwrappedLine(); ++Line->Level; parseStructuralElement(); --Line->Level; } while (true) { if (FormatTok->is(tok::at)) nextToken(); if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, tok::kw___finally) || ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && FormatTok->is(Keywords.kw_finally)) || (FormatTok->isObjCAtKeyword(tok::objc_catch) || FormatTok->isObjCAtKeyword(tok::objc_finally)))) { break; } nextToken(); while (FormatTok->isNot(tok::l_brace)) { if (FormatTok->is(tok::l_paren)) { parseParens(); continue; } if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { if (Style.RemoveBracesLLVM) NestedTooDeep.pop_back(); return; } nextToken(); } NeedsUnwrappedLine = false; Line->MustBeDeclaration = false; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(); if (Style.BraceWrapping.BeforeCatch) addUnwrappedLine(); else NeedsUnwrappedLine = true; } if (Style.RemoveBracesLLVM) NestedTooDeep.pop_back(); if (NeedsUnwrappedLine) addUnwrappedLine(); } void UnwrappedLineParser::parseNamespace() { assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && "'namespace' expected"); const FormatToken &InitialToken = *FormatTok; nextToken(); if (InitialToken.is(TT_NamespaceMacro)) { parseParens(); } else { while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, tok::l_square, tok::period, tok::l_paren) || (Style.isCSharp() && FormatTok->is(tok::kw_union))) { if (FormatTok->is(tok::l_square)) parseSquare(); else if (FormatTok->is(tok::l_paren)) parseParens(); else nextToken(); } } if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_NamespaceLBrace); if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); unsigned AddLevels = Style.NamespaceIndentation == FormatStyle::NI_All || (Style.NamespaceIndentation == FormatStyle::NI_Inner && DeclarationScopeStack.size() > 1) ? 1u : 0u; bool ManageWhitesmithsBraces = AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; // If we're in Whitesmiths mode, indent the brace if we're not indenting // the whole block. if (ManageWhitesmithsBraces) ++Line->Level; // Munch the semicolon after a namespace. This is more common than one would // think. Putting the semicolon into its own line is very ugly. parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr, ManageWhitesmithsBraces); addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); if (ManageWhitesmithsBraces) --Line->Level; } // FIXME: Add error handling. } void UnwrappedLineParser::parseNew() { assert(FormatTok->is(tok::kw_new) && "'new' expected"); nextToken(); if (Style.isCSharp()) { do { // Handle constructor invocation, e.g. `new(field: value)`. if (FormatTok->is(tok::l_paren)) parseParens(); // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. if (FormatTok->is(tok::l_brace)) parseBracedList(); if (FormatTok->isOneOf(tok::semi, tok::comma)) return; nextToken(); } while (!eof()); } if (Style.Language != FormatStyle::LK_Java) return; // In Java, we can parse everything up to the parens, which aren't optional. do { // There should not be a ;, { or } before the new's open paren. if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) return; // Consume the parens. if (FormatTok->is(tok::l_paren)) { parseParens(); // If there is a class body of an anonymous class, consume that as child. if (FormatTok->is(tok::l_brace)) parseChildBlock(); return; } nextToken(); } while (!eof()); } void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { keepAncestorBraces(); if (isBlockBegin(*FormatTok)) { FormatTok->setFinalizedType(TT_ControlStatementLBrace); FormatToken *LeftBrace = FormatTok; CompoundStatementIndenter Indenter(this, Style, Line->Level); parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, /*MunchSemi=*/true, KeepBraces); setPreviousRBraceType(TT_ControlStatementRBrace); if (!KeepBraces) { assert(!NestedTooDeep.empty()); if (!NestedTooDeep.back()) markOptionalBraces(LeftBrace); } if (WrapRightBrace) addUnwrappedLine(); } else { parseUnbracedBody(); } if (!KeepBraces) NestedTooDeep.pop_back(); } void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || (Style.isVerilog() && FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, Keywords.kw_always_ff, Keywords.kw_always_latch, Keywords.kw_final, Keywords.kw_initial, Keywords.kw_foreach, Keywords.kw_forever, Keywords.kw_repeat))) && "'for', 'while' or foreach macro expected"); const bool KeepBraces = !Style.RemoveBracesLLVM || !FormatTok->isOneOf(tok::kw_for, tok::kw_while); nextToken(); // JS' for await ( ... if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) nextToken(); if (IsCpp && FormatTok->is(tok::kw_co_await)) nextToken(); if (HasParens && FormatTok->is(tok::l_paren)) { // The type is only set for Verilog basically because we were afraid to // change the existing behavior for loops. See the discussion on D121756 for // details. if (Style.isVerilog()) FormatTok->setFinalizedType(TT_ConditionLParen); parseParens(); } if (Style.isVerilog()) { // Event control. parseVerilogSensitivityList(); } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && Tokens->getPreviousToken()->is(tok::r_paren)) { nextToken(); addUnwrappedLine(); return; } handleAttributes(); parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); } void UnwrappedLineParser::parseDoWhile() { assert(FormatTok->is(tok::kw_do) && "'do' expected"); nextToken(); parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); // FIXME: Add error handling. if (FormatTok->isNot(tok::kw_while)) { addUnwrappedLine(); return; } FormatTok->setFinalizedType(TT_DoWhile); // If in Whitesmiths mode, the line with the while() needs to be indented // to the same level as the block. if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) ++Line->Level; nextToken(); parseStructuralElement(); } void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { nextToken(); unsigned OldLineLevel = Line->Level; if (LeftAlignLabel) Line->Level = 0; else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) --Line->Level; if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && FormatTok->is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Line->Level, Style.BraceWrapping.AfterCaseLabel, Style.BraceWrapping.IndentBraces); parseBlock(); if (FormatTok->is(tok::kw_break)) { if (Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Always) { addUnwrappedLine(); if (!Style.IndentCaseBlocks && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { ++Line->Level; } } parseStructuralElement(); } addUnwrappedLine(); } else { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); } Line->Level = OldLineLevel; if (FormatTok->isNot(tok::l_brace)) { parseStructuralElement(); addUnwrappedLine(); } } void UnwrappedLineParser::parseCaseLabel() { assert(FormatTok->is(tok::kw_case) && "'case' expected"); auto *Case = FormatTok; // FIXME: fix handling of complex expressions here. do { nextToken(); if (FormatTok->is(tok::colon)) { FormatTok->setFinalizedType(TT_CaseLabelColon); break; } if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { FormatTok->setFinalizedType(TT_CaseLabelArrow); Case->setFinalizedType(TT_SwitchExpressionLabel); break; } } while (!eof()); parseLabel(); } void UnwrappedLineParser::parseSwitch(bool IsExpr) { assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); keepAncestorBraces(); if (FormatTok->is(tok::l_brace)) { CompoundStatementIndenter Indenter(this, Style, Line->Level); FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace : TT_ControlStatementLBrace); if (IsExpr) parseChildBlock(); else parseBlock(); setPreviousRBraceType(TT_ControlStatementRBrace); if (!IsExpr) addUnwrappedLine(); } else { addUnwrappedLine(); ++Line->Level; parseStructuralElement(); --Line->Level; } if (Style.RemoveBracesLLVM) NestedTooDeep.pop_back(); } // Operators that can follow a C variable. static bool isCOperatorFollowingVar(tok::TokenKind Kind) { switch (Kind) { case tok::ampamp: case tok::ampequal: case tok::arrow: case tok::caret: case tok::caretequal: case tok::comma: case tok::ellipsis: case tok::equal: case tok::equalequal: case tok::exclaim: case tok::exclaimequal: case tok::greater: case tok::greaterequal: case tok::greatergreater: case tok::greatergreaterequal: case tok::l_paren: case tok::l_square: case tok::less: case tok::lessequal: case tok::lessless: case tok::lesslessequal: case tok::minus: case tok::minusequal: case tok::minusminus: case tok::percent: case tok::percentequal: case tok::period: case tok::pipe: case tok::pipeequal: case tok::pipepipe: case tok::plus: case tok::plusequal: case tok::plusplus: case tok::question: case tok::r_brace: case tok::r_paren: case tok::r_square: case tok::semi: case tok::slash: case tok::slashequal: case tok::star: case tok::starequal: return true; default: return false; } } void UnwrappedLineParser::parseAccessSpecifier() { FormatToken *AccessSpecifierCandidate = FormatTok; nextToken(); // Understand Qt's slots. if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) nextToken(); // Otherwise, we don't know what it is, and we'd better keep the next token. if (FormatTok->is(tok::colon)) { nextToken(); addUnwrappedLine(); } else if (FormatTok->isNot(tok::coloncolon) && !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { // Not a variable name nor namespace name. addUnwrappedLine(); } else if (AccessSpecifierCandidate) { // Consider the access specifier to be a C identifier. AccessSpecifierCandidate->Tok.setKind(tok::identifier); } } /// \brief Parses a requires, decides if it is a clause or an expression. /// \pre The current token has to be the requires keyword. /// \returns true if it parsed a clause. bool UnwrappedLineParser::parseRequires() { assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); auto RequiresToken = FormatTok; // We try to guess if it is a requires clause, or a requires expression. For // that we first consume the keyword and check the next token. nextToken(); switch (FormatTok->Tok.getKind()) { case tok::l_brace: // This can only be an expression, never a clause. parseRequiresExpression(RequiresToken); return false; case tok::l_paren: // Clauses and expression can start with a paren, it's unclear what we have. break; default: // All other tokens can only be a clause. parseRequiresClause(RequiresToken); return true; } // Looking forward we would have to decide if there are function declaration // like arguments to the requires expression: // requires (T t) { // Or there is a constraint expression for the requires clause: // requires (C && ... // But first let's look behind. auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); if (!PreviousNonComment || PreviousNonComment->is(TT_RequiresExpressionLBrace)) { // If there is no token, or an expression left brace, we are a requires // clause within a requires expression. parseRequiresClause(RequiresToken); return true; } switch (PreviousNonComment->Tok.getKind()) { case tok::greater: case tok::r_paren: case tok::kw_noexcept: case tok::kw_const: // This is a requires clause. parseRequiresClause(RequiresToken); return true; case tok::amp: case tok::ampamp: { // This can be either: // if (... && requires (T t) ...) // Or // void member(...) && requires (C ... // We check the one token before that for a const: // void member(...) const && requires (C ... auto PrevPrev = PreviousNonComment->getPreviousNonComment(); if (PrevPrev && PrevPrev->is(tok::kw_const)) { parseRequiresClause(RequiresToken); return true; } break; } default: if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { // This is a requires clause. parseRequiresClause(RequiresToken); return true; } // It's an expression. parseRequiresExpression(RequiresToken); return false; } // Now we look forward and try to check if the paren content is a parameter // list. The parameters can be cv-qualified and contain references or // pointers. // So we want basically to check for TYPE NAME, but TYPE can contain all kinds // of stuff: typename, const, *, &, &&, ::, identifiers. unsigned StoredPosition = Tokens->getPosition(); FormatToken *NextToken = Tokens->getNextToken(); int Lookahead = 0; auto PeekNext = [&Lookahead, &NextToken, this] { ++Lookahead; NextToken = Tokens->getNextToken(); }; bool FoundType = false; bool LastWasColonColon = false; int OpenAngles = 0; for (; Lookahead < 50; PeekNext()) { switch (NextToken->Tok.getKind()) { case tok::kw_volatile: case tok::kw_const: case tok::comma: if (OpenAngles == 0) { FormatTok = Tokens->setPosition(StoredPosition); parseRequiresExpression(RequiresToken); return false; } break; case tok::eof: // Break out of the loop. Lookahead = 50; break; case tok::coloncolon: LastWasColonColon = true; break; case tok::kw_decltype: case tok::identifier: if (FoundType && !LastWasColonColon && OpenAngles == 0) { FormatTok = Tokens->setPosition(StoredPosition); parseRequiresExpression(RequiresToken); return false; } FoundType = true; LastWasColonColon = false; break; case tok::less: ++OpenAngles; break; case tok::greater: --OpenAngles; break; default: if (NextToken->isTypeName(LangOpts)) { FormatTok = Tokens->setPosition(StoredPosition); parseRequiresExpression(RequiresToken); return false; } break; } } // This seems to be a complicated expression, just assume it's a clause. FormatTok = Tokens->setPosition(StoredPosition); parseRequiresClause(RequiresToken); return true; } /// \brief Parses a requires clause. /// \param RequiresToken The requires keyword token, which starts this clause. /// \pre We need to be on the next token after the requires keyword. /// \sa parseRequiresExpression /// /// Returns if it either has finished parsing the clause, or it detects, that /// the clause is incorrect. void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { assert(FormatTok->getPreviousNonComment() == RequiresToken); assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); // If there is no previous token, we are within a requires expression, // otherwise we will always have the template or function declaration in front // of it. bool InRequiresExpression = !RequiresToken->Previous || RequiresToken->Previous->is(TT_RequiresExpressionLBrace); RequiresToken->setFinalizedType(InRequiresExpression ? TT_RequiresClauseInARequiresExpression : TT_RequiresClause); // NOTE: parseConstraintExpression is only ever called from this function. // It could be inlined into here. parseConstraintExpression(); if (!InRequiresExpression) FormatTok->Previous->ClosesRequiresClause = true; } /// \brief Parses a requires expression. /// \param RequiresToken The requires keyword token, which starts this clause. /// \pre We need to be on the next token after the requires keyword. /// \sa parseRequiresClause /// /// Returns if it either has finished parsing the expression, or it detects, /// that the expression is incorrect. void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { assert(FormatTok->getPreviousNonComment() == RequiresToken); assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); RequiresToken->setFinalizedType(TT_RequiresExpression); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_RequiresExpressionLParen); parseParens(); } if (FormatTok->is(tok::l_brace)) { FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); parseChildBlock(); } } /// \brief Parses a constraint expression. /// /// This is the body of a requires clause. It returns, when the parsing is /// complete, or the expression is incorrect. void UnwrappedLineParser::parseConstraintExpression() { // The special handling for lambdas is needed since tryToParseLambda() eats a // token and if a requires expression is the last part of a requires clause // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is // not set on the correct token. Thus we need to be aware if we even expect a // lambda to be possible. // template requires requires { ... } [[nodiscard]] ...; bool LambdaNextTimeAllowed = true; // Within lambda declarations, it is permitted to put a requires clause after // its template parameter list, which would place the requires clause right // before the parentheses of the parameters of the lambda declaration. Thus, // we track if we expect to see grouping parentheses at all. // Without this check, `requires foo (T t)` in the below example would be // seen as the whole requires clause, accidentally eating the parameters of // the lambda. // [&] requires foo (T t) { ... }; bool TopLevelParensAllowed = true; do { bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); switch (FormatTok->Tok.getKind()) { case tok::kw_requires: { auto RequiresToken = FormatTok; nextToken(); parseRequiresExpression(RequiresToken); break; } case tok::l_paren: if (!TopLevelParensAllowed) return; parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); TopLevelParensAllowed = false; break; case tok::l_square: if (!LambdaThisTimeAllowed || !tryToParseLambda()) return; break; case tok::kw_const: case tok::semi: case tok::kw_class: case tok::kw_struct: case tok::kw_union: return; case tok::l_brace: // Potential function body. return; case tok::ampamp: case tok::pipepipe: FormatTok->setFinalizedType(TT_BinaryOperator); nextToken(); LambdaNextTimeAllowed = true; TopLevelParensAllowed = true; break; case tok::comma: case tok::comment: LambdaNextTimeAllowed = LambdaThisTimeAllowed; nextToken(); break; case tok::kw_sizeof: case tok::greater: case tok::greaterequal: case tok::greatergreater: case tok::less: case tok::lessequal: case tok::lessless: case tok::equalequal: case tok::exclaim: case tok::exclaimequal: case tok::plus: case tok::minus: case tok::star: case tok::slash: LambdaNextTimeAllowed = true; TopLevelParensAllowed = true; // Just eat them. nextToken(); break; case tok::numeric_constant: case tok::coloncolon: case tok::kw_true: case tok::kw_false: TopLevelParensAllowed = false; // Just eat them. nextToken(); break; case tok::kw_static_cast: case tok::kw_const_cast: case tok::kw_reinterpret_cast: case tok::kw_dynamic_cast: nextToken(); if (FormatTok->isNot(tok::less)) return; nextToken(); parseBracedList(/*IsAngleBracket=*/true); break; default: if (!FormatTok->Tok.getIdentifierInfo()) { // Identifiers are part of the default case, we check for more then // tok::identifier to handle builtin type traits. return; } // We need to differentiate identifiers for a template deduction guide, // variables, or function return types (the constraint expression has // ended before that), and basically all other cases. But it's easier to // check the other way around. assert(FormatTok->Previous); switch (FormatTok->Previous->Tok.getKind()) { case tok::coloncolon: // Nested identifier. case tok::ampamp: // Start of a function or variable for the case tok::pipepipe: // constraint expression. (binary) case tok::exclaim: // The same as above, but unary. case tok::kw_requires: // Initial identifier of a requires clause. case tok::equal: // Initial identifier of a concept declaration. break; default: return; } // Read identifier with optional template declaration. nextToken(); if (FormatTok->is(tok::less)) { nextToken(); parseBracedList(/*IsAngleBracket=*/true); } TopLevelParensAllowed = false; break; } } while (!eof()); } bool UnwrappedLineParser::parseEnum() { const FormatToken &InitialToken = *FormatTok; // Won't be 'enum' for NS_ENUMs. if (FormatTok->is(tok::kw_enum)) nextToken(); // In TypeScript, "enum" can also be used as property name, e.g. in interface // declarations. An "enum" keyword followed by a colon would be a syntax // error and thus assume it is just an identifier. if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) return false; // In protobuf, "enum" can be used as a field name. if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) return false; if (IsCpp) { // Eat up enum class ... if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) nextToken(); while (FormatTok->is(tok::l_square)) if (!handleCppAttributes()) return false; } while (FormatTok->Tok.getIdentifierInfo() || FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, tok::greater, tok::comma, tok::question, tok::l_square)) { if (Style.isVerilog()) { FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); nextToken(); // In Verilog the base type can have dimensions. while (FormatTok->is(tok::l_square)) parseSquare(); } else { nextToken(); } // We can have macros or attributes in between 'enum' and the enum name. if (FormatTok->is(tok::l_paren)) parseParens(); if (FormatTok->is(tok::identifier)) { nextToken(); // If there are two identifiers in a row, this is likely an elaborate // return type. In Java, this can be "implements", etc. if (IsCpp && FormatTok->is(tok::identifier)) return false; } } // Just a declaration or something is wrong. if (FormatTok->isNot(tok::l_brace)) return true; FormatTok->setFinalizedType(TT_EnumLBrace); FormatTok->setBlockKind(BK_Block); if (Style.Language == FormatStyle::LK_Java) { // Java enums are different. parseJavaEnumBody(); return true; } if (Style.Language == FormatStyle::LK_Proto) { parseBlock(/*MustBeDeclaration=*/true); return true; } if (!Style.AllowShortEnumsOnASingleLine && ShouldBreakBeforeBrace(Style, InitialToken)) { addUnwrappedLine(); } // Parse enum body. nextToken(); if (!Style.AllowShortEnumsOnASingleLine) { addUnwrappedLine(); Line->Level += 1; } bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); if (!Style.AllowShortEnumsOnASingleLine) Line->Level -= 1; if (HasError) { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); } setPreviousRBraceType(TT_EnumRBrace); return true; // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "enum A {} n, m;", // "} n, m;" will end up in one unwrapped line. } bool UnwrappedLineParser::parseStructLike() { // parseRecord falls through and does not yet add an unwrapped line as a // record declaration or definition can start a structural element. parseRecord(); // This does not apply to Java, JavaScript and C#. if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || Style.isCSharp()) { if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); return true; } return false; } namespace { // A class used to set and restore the Token position when peeking // ahead in the token source. class ScopedTokenPosition { unsigned StoredPosition; FormatTokenSource *Tokens; public: ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { assert(Tokens && "Tokens expected to not be null"); StoredPosition = Tokens->getPosition(); } ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } }; } // namespace // Look to see if we have [[ by looking ahead, if // its not then rewind to the original position. bool UnwrappedLineParser::tryToParseSimpleAttribute() { ScopedTokenPosition AutoPosition(Tokens); FormatToken *Tok = Tokens->getNextToken(); // We already read the first [ check for the second. if (Tok->isNot(tok::l_square)) return false; // Double check that the attribute is just something // fairly simple. while (Tok->isNot(tok::eof)) { if (Tok->is(tok::r_square)) break; Tok = Tokens->getNextToken(); } if (Tok->is(tok::eof)) return false; Tok = Tokens->getNextToken(); if (Tok->isNot(tok::r_square)) return false; Tok = Tokens->getNextToken(); if (Tok->is(tok::semi)) return false; return true; } void UnwrappedLineParser::parseJavaEnumBody() { assert(FormatTok->is(tok::l_brace)); const FormatToken *OpeningBrace = FormatTok; // Determine whether the enum is simple, i.e. does not have a semicolon or // constants with class bodies. Simple enums can be formatted like braced // lists, contracted to a single line, etc. unsigned StoredPosition = Tokens->getPosition(); bool IsSimple = true; FormatToken *Tok = Tokens->getNextToken(); while (Tok->isNot(tok::eof)) { if (Tok->is(tok::r_brace)) break; if (Tok->isOneOf(tok::l_brace, tok::semi)) { IsSimple = false; break; } // FIXME: This will also mark enums with braces in the arguments to enum // constants as "not simple". This is probably fine in practice, though. Tok = Tokens->getNextToken(); } FormatTok = Tokens->setPosition(StoredPosition); if (IsSimple) { nextToken(); parseBracedList(); addUnwrappedLine(); return; } // Parse the body of a more complex enum. // First add a line for everything up to the "{". nextToken(); addUnwrappedLine(); ++Line->Level; // Parse the enum constants. while (!eof()) { if (FormatTok->is(tok::l_brace)) { // Parse the constant's class body. parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, /*MunchSemi=*/false); } else if (FormatTok->is(tok::l_paren)) { parseParens(); } else if (FormatTok->is(tok::comma)) { nextToken(); addUnwrappedLine(); } else if (FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); break; } else if (FormatTok->is(tok::r_brace)) { addUnwrappedLine(); break; } else { nextToken(); } } // Parse the class body after the enum's ";" if any. parseLevel(OpeningBrace); nextToken(); --Line->Level; addUnwrappedLine(); } void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { const FormatToken &InitialToken = *FormatTok; nextToken(); const FormatToken *ClassName = nullptr; bool IsDerived = false; auto IsNonMacroIdentifier = [](const FormatToken *Tok) { return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); }; // JavaScript/TypeScript supports anonymous classes like: // a = class extends foo { } bool JSPastExtendsOrImplements = false; // The actual identifier can be a nested name specifier, and in macros // it is often token-pasted. // An [[attribute]] can be before the identifier. while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, tok::kw_alignas, tok::l_square) || FormatTok->isAttribute() || ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && FormatTok->isOneOf(tok::period, tok::comma))) { if (Style.isJavaScript() && FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { JSPastExtendsOrImplements = true; // JavaScript/TypeScript supports inline object types in // extends/implements positions: // class Foo implements {bar: number} { } nextToken(); if (FormatTok->is(tok::l_brace)) { tryToParseBracedList(); continue; } } if (FormatTok->is(tok::l_square) && handleCppAttributes()) continue; const auto *Previous = FormatTok; nextToken(); switch (FormatTok->Tok.getKind()) { case tok::l_paren: // We can have macros in between 'class' and the class name. if (!IsNonMacroIdentifier(Previous) || // e.g. `struct macro(a) S { int i; };` Previous->Previous == &InitialToken) { parseParens(); } break; case tok::coloncolon: case tok::hashhash: break; default: if (!JSPastExtendsOrImplements && !ClassName && Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) { ClassName = Previous; } } } auto IsListInitialization = [&] { if (!ClassName || IsDerived) return false; assert(FormatTok->is(tok::l_brace)); const auto *Prev = FormatTok->getPreviousNonComment(); assert(Prev); return Prev != ClassName && Prev->is(tok::identifier) && Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); }; if (FormatTok->isOneOf(tok::colon, tok::less)) { int AngleNestingLevel = 0; do { if (FormatTok->is(tok::less)) ++AngleNestingLevel; else if (FormatTok->is(tok::greater)) --AngleNestingLevel; if (AngleNestingLevel == 0) { if (FormatTok->is(tok::colon)) { IsDerived = true; } else if (FormatTok->is(tok::identifier) && FormatTok->Previous->is(tok::coloncolon)) { ClassName = FormatTok; } else if (FormatTok->is(tok::l_paren) && IsNonMacroIdentifier(FormatTok->Previous)) { break; } } if (FormatTok->is(tok::l_brace)) { if (AngleNestingLevel == 0 && IsListInitialization()) return; calculateBraceTypes(/*ExpectClassBody=*/true); if (!tryToParseBracedList()) break; } if (FormatTok->is(tok::l_square)) { FormatToken *Previous = FormatTok->Previous; if (!Previous || (Previous->isNot(tok::r_paren) && !Previous->isTypeOrIdentifier(LangOpts))) { // Don't try parsing a lambda if we had a closing parenthesis before, // it was probably a pointer to an array: int (*)[]. if (!tryToParseLambda()) continue; } else { parseSquare(); continue; } } if (FormatTok->is(tok::semi)) return; if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { addUnwrappedLine(); nextToken(); parseCSharpGenericTypeConstraint(); break; } nextToken(); } while (!eof()); } auto GetBraceTypes = [](const FormatToken &RecordTok) -> std::pair { switch (RecordTok.Tok.getKind()) { case tok::kw_class: return {TT_ClassLBrace, TT_ClassRBrace}; case tok::kw_struct: return {TT_StructLBrace, TT_StructRBrace}; case tok::kw_union: return {TT_UnionLBrace, TT_UnionRBrace}; default: // Useful for e.g. interface. return {TT_RecordLBrace, TT_RecordRBrace}; } }; if (FormatTok->is(tok::l_brace)) { if (IsListInitialization()) return; auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); FormatTok->setFinalizedType(OpenBraceType); if (ParseAsExpr) { parseChildBlock(); } else { if (ShouldBreakBeforeBrace(Style, InitialToken)) addUnwrappedLine(); unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); } setPreviousRBraceType(ClosingBraceType); } // There is no addUnwrappedLine() here so that we fall through to parsing a // structural element afterwards. Thus, in "class A {} n, m;", // "} n, m;" will end up in one unwrapped line. } void UnwrappedLineParser::parseObjCMethod() { assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && "'(' or identifier expected."); do { if (FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); return; } else if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterFunction) addUnwrappedLine(); parseBlock(); addUnwrappedLine(); return; } else { nextToken(); } } while (!eof()); } void UnwrappedLineParser::parseObjCProtocolList() { assert(FormatTok->is(tok::less) && "'<' expected."); do { nextToken(); // Early exit in case someone forgot a close angle. if (FormatTok->isOneOf(tok::semi, tok::l_brace) || FormatTok->isObjCAtKeyword(tok::objc_end)) { return; } } while (!eof() && FormatTok->isNot(tok::greater)); nextToken(); // Skip '>'. } void UnwrappedLineParser::parseObjCUntilAtEnd() { do { if (FormatTok->isObjCAtKeyword(tok::objc_end)) { nextToken(); addUnwrappedLine(); break; } if (FormatTok->is(tok::l_brace)) { parseBlock(); // In ObjC interfaces, nothing should be following the "}". addUnwrappedLine(); } else if (FormatTok->is(tok::r_brace)) { // Ignore stray "}". parseStructuralElement doesn't consume them. nextToken(); addUnwrappedLine(); } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { nextToken(); parseObjCMethod(); } else { parseStructuralElement(); } } while (!eof()); } void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); nextToken(); nextToken(); // interface name // @interface can be followed by a lightweight generic // specialization list, then either a base class or a category. if (FormatTok->is(tok::less)) parseObjCLightweightGenerics(); if (FormatTok->is(tok::colon)) { nextToken(); nextToken(); // base class name // The base class can also have lightweight generics applied to it. if (FormatTok->is(tok::less)) parseObjCLightweightGenerics(); } else if (FormatTok->is(tok::l_paren)) { // Skip category, if present. parseParens(); } if (FormatTok->is(tok::less)) parseObjCProtocolList(); if (FormatTok->is(tok::l_brace)) { if (Style.BraceWrapping.AfterObjCDeclaration) addUnwrappedLine(); parseBlock(/*MustBeDeclaration=*/true); } // With instance variables, this puts '}' on its own line. Without instance // variables, this ends the @interface line. addUnwrappedLine(); parseObjCUntilAtEnd(); } void UnwrappedLineParser::parseObjCLightweightGenerics() { assert(FormatTok->is(tok::less)); // Unlike protocol lists, generic parameterizations support // nested angles: // // @interface Foo> : // NSObject // // so we need to count how many open angles we have left. unsigned NumOpenAngles = 1; do { nextToken(); // Early exit in case someone forgot a close angle. if (FormatTok->isOneOf(tok::semi, tok::l_brace) || FormatTok->isObjCAtKeyword(tok::objc_end)) { break; } if (FormatTok->is(tok::less)) { ++NumOpenAngles; } else if (FormatTok->is(tok::greater)) { assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); --NumOpenAngles; } } while (!eof() && NumOpenAngles != 0); nextToken(); // Skip '>'. } // Returns true for the declaration/definition form of @protocol, // false for the expression form. bool UnwrappedLineParser::parseObjCProtocol() { assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); nextToken(); if (FormatTok->is(tok::l_paren)) { // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". return false; } // The definition/declaration form, // @protocol Foo // - (int)someMethod; // @end nextToken(); // protocol name if (FormatTok->is(tok::less)) parseObjCProtocolList(); // Check for protocol declaration. if (FormatTok->is(tok::semi)) { nextToken(); addUnwrappedLine(); return true; } addUnwrappedLine(); parseObjCUntilAtEnd(); return true; } void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { bool IsImport = FormatTok->is(Keywords.kw_import); assert(IsImport || FormatTok->is(tok::kw_export)); nextToken(); // Consume the "default" in "export default class/function". if (FormatTok->is(tok::kw_default)) nextToken(); // Consume "async function", "function" and "default function", so that these // get parsed as free-standing JS functions, i.e. do not require a trailing // semicolon. if (FormatTok->is(Keywords.kw_async)) nextToken(); if (FormatTok->is(Keywords.kw_function)) { nextToken(); return; } // For imports, `export *`, `export {...}`, consume the rest of the line up // to the terminating `;`. For everything else, just return and continue // parsing the structural element, i.e. the declaration or expression for // `export default`. if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && !FormatTok->isStringLiteral() && !(FormatTok->is(Keywords.kw_type) && Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { return; } while (!eof()) { if (FormatTok->is(tok::semi)) return; if (Line->Tokens.empty()) { // Common issue: Automatic Semicolon Insertion wrapped the line, so the // import statement should terminate. return; } if (FormatTok->is(tok::l_brace)) { FormatTok->setBlockKind(BK_Block); nextToken(); parseBracedList(); } else { nextToken(); } } } void UnwrappedLineParser::parseStatementMacro() { nextToken(); if (FormatTok->is(tok::l_paren)) parseParens(); if (FormatTok->is(tok::semi)) nextToken(); addUnwrappedLine(); } void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { // consume things like a::`b.c[d:e] or a::* while (true) { if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, tok::coloncolon, tok::hash) || Keywords.isVerilogIdentifier(*FormatTok)) { nextToken(); } else if (FormatTok->is(tok::l_square)) { parseSquare(); } else { break; } } } void UnwrappedLineParser::parseVerilogSensitivityList() { if (FormatTok->isNot(tok::at)) return; nextToken(); // A block event expression has 2 at signs. if (FormatTok->is(tok::at)) nextToken(); switch (FormatTok->Tok.getKind()) { case tok::star: nextToken(); break; case tok::l_paren: parseParens(); break; default: parseVerilogHierarchyIdentifier(); break; } } unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { unsigned AddLevels = 0; if (FormatTok->is(Keywords.kw_clocking)) { nextToken(); if (Keywords.isVerilogIdentifier(*FormatTok)) nextToken(); parseVerilogSensitivityList(); if (FormatTok->is(tok::semi)) nextToken(); } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, Keywords.kw_casez, Keywords.kw_randcase, Keywords.kw_randsequence)) { if (Style.IndentCaseLabels) AddLevels++; nextToken(); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_ConditionLParen); parseParens(); } if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) nextToken(); // The case header has no semicolon. } else { // "module" etc. nextToken(); // all the words like the name of the module and specifiers like // "automatic" and the width of function return type while (true) { if (FormatTok->is(tok::l_square)) { auto Prev = FormatTok->getPreviousNonComment(); if (Prev && Keywords.isVerilogIdentifier(*Prev)) Prev->setFinalizedType(TT_VerilogDimensionedTypeName); parseSquare(); } else if (Keywords.isVerilogIdentifier(*FormatTok) || FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { nextToken(); } else { break; } } auto NewLine = [this]() { addUnwrappedLine(); Line->IsContinuation = true; }; // package imports while (FormatTok->is(Keywords.kw_import)) { NewLine(); nextToken(); parseVerilogHierarchyIdentifier(); if (FormatTok->is(tok::semi)) nextToken(); } // parameters and ports if (FormatTok->is(Keywords.kw_verilogHash)) { NewLine(); nextToken(); if (FormatTok->is(tok::l_paren)) { FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); parseParens(); } } if (FormatTok->is(tok::l_paren)) { NewLine(); FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); parseParens(); } // extends and implements if (FormatTok->is(Keywords.kw_extends)) { NewLine(); nextToken(); parseVerilogHierarchyIdentifier(); if (FormatTok->is(tok::l_paren)) parseParens(); } if (FormatTok->is(Keywords.kw_implements)) { NewLine(); do { nextToken(); parseVerilogHierarchyIdentifier(); } while (FormatTok->is(tok::comma)); } // Coverage event for cover groups. if (FormatTok->is(tok::at)) { NewLine(); parseVerilogSensitivityList(); } if (FormatTok->is(tok::semi)) nextToken(/*LevelDifference=*/1); addUnwrappedLine(); } return AddLevels; } void UnwrappedLineParser::parseVerilogTable() { assert(FormatTok->is(Keywords.kw_table)); nextToken(/*LevelDifference=*/1); addUnwrappedLine(); auto InitialLevel = Line->Level++; while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { FormatToken *Tok = FormatTok; nextToken(); if (Tok->is(tok::semi)) addUnwrappedLine(); else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) Tok->setFinalizedType(TT_VerilogTableItem); } Line->Level = InitialLevel; nextToken(/*LevelDifference=*/-1); addUnwrappedLine(); } void UnwrappedLineParser::parseVerilogCaseLabel() { // The label will get unindented in AnnotatingParser. If there are no leading // spaces, indent the rest here so that things inside the block will be // indented relative to things outside. We don't use parseLabel because we // don't know whether this colon is a label or a ternary expression at this // point. auto OrigLevel = Line->Level; auto FirstLine = CurrentLines->size(); if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) ++Line->Level; else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) --Line->Level; parseStructuralElement(); // Restore the indentation in both the new line and the line that has the // label. if (CurrentLines->size() > FirstLine) (*CurrentLines)[FirstLine].Level = OrigLevel; Line->Level = OrigLevel; } bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { for (const auto &N : Line.Tokens) { if (N.Tok->MacroCtx) return true; for (const UnwrappedLine &Child : N.Children) if (containsExpansion(Child)) return true; } return false; } void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ if (!parsingPPDirective()) { llvm::dbgs() << "Adding unwrapped line:\n"; printDebugInfo(*Line); } }); // If this line closes a block when in Whitesmiths mode, remember that // information so that the level can be decreased after the line is added. // This has to happen after the addition of the line since the line itself // needs to be indented. bool ClosesWhitesmithsBlock = Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; // If the current line was expanded from a macro call, we use it to // reconstruct an unwrapped line from the structure of the expanded unwrapped // line and the unexpanded token stream. if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { if (!Reconstruct) Reconstruct.emplace(Line->Level, Unexpanded); Reconstruct->addLine(*Line); // While the reconstructed unexpanded lines are stored in the normal // flow of lines, the expanded lines are stored on the side to be analyzed // in an extra step. CurrentExpandedLines.push_back(std::move(*Line)); if (Reconstruct->finished()) { UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); assert(!Reconstructed.Tokens.empty() && "Reconstructed must at least contain the macro identifier."); assert(!parsingPPDirective()); LLVM_DEBUG({ llvm::dbgs() << "Adding unexpanded line:\n"; printDebugInfo(Reconstructed); }); ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; Lines.push_back(std::move(Reconstructed)); CurrentExpandedLines.clear(); Reconstruct.reset(); } } else { // At the top level we only get here when no unexpansion is going on, or // when conditional formatting led to unfinished macro reconstructions. assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); CurrentLines->push_back(std::move(*Line)); } Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; Line->IsContinuation = false; Line->SeenDecltypeAuto = false; if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) --Line->Level; if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), std::make_move_iterator(PreprocessorDirectives.end())); PreprocessorDirectives.clear(); } // Disconnect the current token from the last token on the previous line. FormatTok->Previous = nullptr; } bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && FormatTok.NewlinesBefore > 0; } // Checks if \p FormatTok is a line comment that continues the line comment // section on \p Line. static bool continuesLineCommentSection(const FormatToken &FormatTok, const UnwrappedLine &Line, const llvm::Regex &CommentPragmasRegex) { if (Line.Tokens.empty()) return false; StringRef IndentContent = FormatTok.TokenText; if (FormatTok.TokenText.starts_with("//") || FormatTok.TokenText.starts_with("/*")) { IndentContent = FormatTok.TokenText.substr(2); } if (CommentPragmasRegex.match(IndentContent)) return false; // If Line starts with a line comment, then FormatTok continues the comment // section if its original column is greater or equal to the original start // column of the line. // // Define the min column token of a line as follows: if a line ends in '{' or // contains a '{' followed by a line comment, then the min column token is // that '{'. Otherwise, the min column token of the line is the first token of // the line. // // If Line starts with a token other than a line comment, then FormatTok // continues the comment section if its original column is greater than the // original start column of the min column token of the line. // // For example, the second line comment continues the first in these cases: // // // first line // // second line // // and: // // // first line // // second line // // and: // // int i; // first line // // second line // // and: // // do { // first line // // second line // int i; // } while (true); // // and: // // enum { // a, // first line // // second line // b // }; // // The second line comment doesn't continue the first in these cases: // // // first line // // second line // // and: // // int i; // first line // // second line // // and: // // do { // first line // // second line // int i; // } while (true); // // and: // // enum { // a, // first line // // second line // }; const FormatToken *MinColumnToken = Line.Tokens.front().Tok; // Scan for '{//'. If found, use the column of '{' as a min column for line // comment section continuation. const FormatToken *PreviousToken = nullptr; for (const UnwrappedLineNode &Node : Line.Tokens) { if (PreviousToken && PreviousToken->is(tok::l_brace) && isLineComment(*Node.Tok)) { MinColumnToken = PreviousToken; break; } PreviousToken = Node.Tok; // Grab the last newline preceding a token in this unwrapped line. if (Node.Tok->NewlinesBefore > 0) MinColumnToken = Node.Tok; } if (PreviousToken && PreviousToken->is(tok::l_brace)) MinColumnToken = PreviousToken; return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, MinColumnToken); } void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { bool JustComments = Line->Tokens.empty(); for (FormatToken *Tok : CommentsBeforeNextToken) { // Line comments that belong to the same line comment section are put on the // same line since later we might want to reflow content between them. // Additional fine-grained breaking of line comment sections is controlled // by the class BreakableLineCommentSection in case it is desirable to keep // several line comment sections in the same unwrapped line. // // FIXME: Consider putting separate line comment sections as children to the // unwrapped line instead. Tok->ContinuesLineCommentSection = continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) addUnwrappedLine(); pushToken(Tok); } if (NewlineBeforeNext && JustComments) addUnwrappedLine(); CommentsBeforeNextToken.clear(); } void UnwrappedLineParser::nextToken(int LevelDifference) { if (eof()) return; flushComments(isOnNewLine(*FormatTok)); pushToken(FormatTok); FormatToken *Previous = FormatTok; if (!Style.isJavaScript()) readToken(LevelDifference); else readTokenWithJavaScriptASI(); FormatTok->Previous = Previous; if (Style.isVerilog()) { // Blocks in Verilog can have `begin` and `end` instead of braces. For // keywords like `begin`, we can't treat them the same as left braces // because some contexts require one of them. For example structs use // braces and if blocks use keywords, and a left brace can occur in an if // statement, but it is not a block. For keywords like `end`, we simply // treat them the same as right braces. if (Keywords.isVerilogEnd(*FormatTok)) FormatTok->Tok.setKind(tok::r_brace); } } void UnwrappedLineParser::distributeComments( const SmallVectorImpl &Comments, const FormatToken *NextTok) { // Whether or not a line comment token continues a line is controlled by // the method continuesLineCommentSection, with the following caveat: // // Define a trail of Comments to be a nonempty proper postfix of Comments such // that each comment line from the trail is aligned with the next token, if // the next token exists. If a trail exists, the beginning of the maximal // trail is marked as a start of a new comment section. // // For example in this code: // // int a; // line about a // // line 1 about b // // line 2 about b // int b; // // the two lines about b form a maximal trail, so there are two sections, the // first one consisting of the single comment "// line about a" and the // second one consisting of the next two comments. if (Comments.empty()) return; bool ShouldPushCommentsInCurrentLine = true; bool HasTrailAlignedWithNextToken = false; unsigned StartOfTrailAlignedWithNextToken = 0; if (NextTok) { // We are skipping the first element intentionally. for (unsigned i = Comments.size() - 1; i > 0; --i) { if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { HasTrailAlignedWithNextToken = true; StartOfTrailAlignedWithNextToken = i; } } } for (unsigned i = 0, e = Comments.size(); i < e; ++i) { FormatToken *FormatTok = Comments[i]; if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { FormatTok->ContinuesLineCommentSection = false; } else { FormatTok->ContinuesLineCommentSection = continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); } if (!FormatTok->ContinuesLineCommentSection && (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { ShouldPushCommentsInCurrentLine = false; } if (ShouldPushCommentsInCurrentLine) pushToken(FormatTok); else CommentsBeforeNextToken.push_back(FormatTok); } } void UnwrappedLineParser::readToken(int LevelDifference) { SmallVector Comments; bool PreviousWasComment = false; bool FirstNonCommentOnLine = false; do { FormatTok = Tokens->getNextToken(); assert(FormatTok); while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, TT_ConflictAlternative)) { if (FormatTok->is(TT_ConflictStart)) conditionalCompilationStart(/*Unreachable=*/false); else if (FormatTok->is(TT_ConflictAlternative)) conditionalCompilationAlternative(); else if (FormatTok->is(TT_ConflictEnd)) conditionalCompilationEnd(); FormatTok = Tokens->getNextToken(); FormatTok->MustBreakBefore = true; FormatTok->MustBreakBeforeFinalized = true; } auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, const FormatToken &Tok, bool PreviousWasComment) { auto IsFirstOnLine = [](const FormatToken &Tok) { return Tok.HasUnescapedNewline || Tok.IsFirst; }; // Consider preprocessor directives preceded by block comments as first // on line. if (PreviousWasComment) return FirstNonCommentOnLine || IsFirstOnLine(Tok); return IsFirstOnLine(Tok); }; FirstNonCommentOnLine = IsFirstNonCommentOnLine( FirstNonCommentOnLine, *FormatTok, PreviousWasComment); PreviousWasComment = FormatTok->is(tok::comment); while (!Line->InPPDirective && FormatTok->is(tok::hash) && (!Style.isVerilog() || Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && FirstNonCommentOnLine) { distributeComments(Comments, FormatTok); Comments.clear(); // If there is an unfinished unwrapped line, we flush the preprocessor // directives only after that unwrapped line was finished later. bool SwitchToPreprocessorLines = !Line->Tokens.empty(); ScopedLineState BlockState(*this, SwitchToPreprocessorLines); assert((LevelDifference >= 0 || static_cast(-LevelDifference) <= Line->Level) && "LevelDifference makes Line->Level negative"); Line->Level += LevelDifference; // Comments stored before the preprocessor directive need to be output // before the preprocessor directive, at the same level as the // preprocessor directive, as we consider them to apply to the directive. if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && PPBranchLevel > 0) { Line->Level += PPBranchLevel; } assert(Line->Level >= Line->UnbracedBodyLevel); Line->Level -= Line->UnbracedBodyLevel; flushComments(isOnNewLine(*FormatTok)); parsePPDirective(); PreviousWasComment = FormatTok->is(tok::comment); FirstNonCommentOnLine = IsFirstNonCommentOnLine( FirstNonCommentOnLine, *FormatTok, PreviousWasComment); } if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && !Line->InPPDirective) { continue; } if (FormatTok->is(tok::identifier) && Macros.defined(FormatTok->TokenText) && // FIXME: Allow expanding macros in preprocessor directives. !Line->InPPDirective) { FormatToken *ID = FormatTok; unsigned Position = Tokens->getPosition(); // To correctly parse the code, we need to replace the tokens of the macro // call with its expansion. auto PreCall = std::move(Line); Line.reset(new UnwrappedLine); bool OldInExpansion = InExpansion; InExpansion = true; // We parse the macro call into a new line. auto Args = parseMacroCall(); InExpansion = OldInExpansion; assert(Line->Tokens.front().Tok == ID); // And remember the unexpanded macro call tokens. auto UnexpandedLine = std::move(Line); // Reset to the old line. Line = std::move(PreCall); LLVM_DEBUG({ llvm::dbgs() << "Macro call: " << ID->TokenText << "("; if (Args) { llvm::dbgs() << "("; for (const auto &Arg : Args.value()) for (const auto &T : Arg) llvm::dbgs() << T->TokenText << " "; llvm::dbgs() << ")"; } llvm::dbgs() << "\n"; }); if (Macros.objectLike(ID->TokenText) && Args && !Macros.hasArity(ID->TokenText, Args->size())) { // The macro is either // - object-like, but we got argumnets, or // - overloaded to be both object-like and function-like, but none of // the function-like arities match the number of arguments. // Thus, expand as object-like macro. LLVM_DEBUG(llvm::dbgs() << "Macro \"" << ID->TokenText << "\" not overloaded for arity " << Args->size() << "or not function-like, using object-like overload."); Args.reset(); UnexpandedLine->Tokens.resize(1); Tokens->setPosition(Position); nextToken(); assert(!Args && Macros.objectLike(ID->TokenText)); } if ((!Args && Macros.objectLike(ID->TokenText)) || (Args && Macros.hasArity(ID->TokenText, Args->size()))) { // Next, we insert the expanded tokens in the token stream at the // current position, and continue parsing. Unexpanded[ID] = std::move(UnexpandedLine); SmallVector Expansion = Macros.expand(ID, std::move(Args)); if (!Expansion.empty()) FormatTok = Tokens->insertTokens(Expansion); LLVM_DEBUG({ llvm::dbgs() << "Expanded: "; for (const auto &T : Expansion) llvm::dbgs() << T->TokenText << " "; llvm::dbgs() << "\n"; }); } else { LLVM_DEBUG({ llvm::dbgs() << "Did not expand macro \"" << ID->TokenText << "\", because it was used "; if (Args) llvm::dbgs() << "with " << Args->size(); else llvm::dbgs() << "without"; llvm::dbgs() << " arguments, which doesn't match any definition.\n"; }); Tokens->setPosition(Position); FormatTok = ID; } } if (FormatTok->isNot(tok::comment)) { distributeComments(Comments, FormatTok); Comments.clear(); return; } Comments.push_back(FormatTok); } while (!eof()); distributeComments(Comments, nullptr); Comments.clear(); } namespace { template void pushTokens(Iterator Begin, Iterator End, llvm::SmallVectorImpl &Into) { for (auto I = Begin; I != End; ++I) { Into.push_back(I->Tok); for (const auto &Child : I->Children) pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); } } } // namespace std::optional, 1>> UnwrappedLineParser::parseMacroCall() { std::optional, 1>> Args; assert(Line->Tokens.empty()); nextToken(); if (FormatTok->isNot(tok::l_paren)) return Args; unsigned Position = Tokens->getPosition(); FormatToken *Tok = FormatTok; nextToken(); Args.emplace(); auto ArgStart = std::prev(Line->Tokens.end()); int Parens = 0; do { switch (FormatTok->Tok.getKind()) { case tok::l_paren: ++Parens; nextToken(); break; case tok::r_paren: { if (Parens > 0) { --Parens; nextToken(); break; } Args->push_back({}); pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); nextToken(); return Args; } case tok::comma: { if (Parens > 0) { nextToken(); break; } Args->push_back({}); pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); nextToken(); ArgStart = std::prev(Line->Tokens.end()); break; } default: nextToken(); break; } } while (!eof()); Line->Tokens.resize(1); Tokens->setPosition(Position); FormatTok = Tok; return {}; } void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { Line->Tokens.back().Tok->MustBreakBefore = true; Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; MustBreakBeforeNextToken = false; } } } // end namespace format } // end namespace clang diff --git a/contrib/llvm-project/clang/lib/Parse/ParseStmt.cpp b/contrib/llvm-project/clang/lib/Parse/ParseStmt.cpp index 22d38adc28eb..3ac1f0fa27f8 100644 --- a/contrib/llvm-project/clang/lib/Parse/ParseStmt.cpp +++ b/contrib/llvm-project/clang/lib/Parse/ParseStmt.cpp @@ -1,2817 +1,2820 @@ //===--- ParseStmt.cpp - Statement and Block Parser -----------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the Statement and Block portions of the Parser // interface. // //===----------------------------------------------------------------------===// #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/Basic/Attributes.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/TokenKinds.h" #include "clang/Parse/LoopHint.h" #include "clang/Parse/Parser.h" #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/EnterExpressionEvaluationContext.h" #include "clang/Sema/Scope.h" #include "clang/Sema/SemaCodeCompletion.h" #include "clang/Sema/SemaObjC.h" #include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/STLExtras.h" #include using namespace clang; //===----------------------------------------------------------------------===// // C99 6.8: Statements and Blocks. //===----------------------------------------------------------------------===// /// Parse a standalone statement (for instance, as the body of an 'if', /// 'while', or 'for'). StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc, ParsedStmtContext StmtCtx) { StmtResult Res; // We may get back a null statement if we found a #pragma. Keep going until // we get an actual statement. StmtVector Stmts; do { Res = ParseStatementOrDeclaration(Stmts, StmtCtx, TrailingElseLoc); } while (!Res.isInvalid() && !Res.get()); return Res; } /// ParseStatementOrDeclaration - Read 'statement' or 'declaration'. /// StatementOrDeclaration: /// statement /// declaration /// /// statement: /// labeled-statement /// compound-statement /// expression-statement /// selection-statement /// iteration-statement /// jump-statement /// [C++] declaration-statement /// [C++] try-block /// [MS] seh-try-block /// [OBC] objc-throw-statement /// [OBC] objc-try-catch-statement /// [OBC] objc-synchronized-statement /// [GNU] asm-statement /// [OMP] openmp-construct [TODO] /// /// labeled-statement: /// identifier ':' statement /// 'case' constant-expression ':' statement /// 'default' ':' statement /// /// selection-statement: /// if-statement /// switch-statement /// /// iteration-statement: /// while-statement /// do-statement /// for-statement /// /// expression-statement: /// expression[opt] ';' /// /// jump-statement: /// 'goto' identifier ';' /// 'continue' ';' /// 'break' ';' /// 'return' expression[opt] ';' /// [GNU] 'goto' '*' expression ';' /// /// [OBC] objc-throw-statement: /// [OBC] '@' 'throw' expression ';' /// [OBC] '@' 'throw' ';' /// StmtResult Parser::ParseStatementOrDeclaration(StmtVector &Stmts, ParsedStmtContext StmtCtx, SourceLocation *TrailingElseLoc) { ParenBraceBracketBalancer BalancerRAIIObj(*this); // Because we're parsing either a statement or a declaration, the order of // attribute parsing is important. [[]] attributes at the start of a // statement are different from [[]] attributes that follow an __attribute__ // at the start of the statement. Thus, we're not using MaybeParseAttributes // here because we don't want to allow arbitrary orderings. ParsedAttributes CXX11Attrs(AttrFactory); MaybeParseCXX11Attributes(CXX11Attrs, /*MightBeObjCMessageSend*/ true); ParsedAttributes GNUOrMSAttrs(AttrFactory); if (getLangOpts().OpenCL) MaybeParseGNUAttributes(GNUOrMSAttrs); if (getLangOpts().HLSL) MaybeParseMicrosoftAttributes(GNUOrMSAttrs); StmtResult Res = ParseStatementOrDeclarationAfterAttributes( Stmts, StmtCtx, TrailingElseLoc, CXX11Attrs, GNUOrMSAttrs); MaybeDestroyTemplateIds(); // Attributes that are left should all go on the statement, so concatenate the // two lists. ParsedAttributes Attrs(AttrFactory); takeAndConcatenateAttrs(CXX11Attrs, GNUOrMSAttrs, Attrs); assert((Attrs.empty() || Res.isInvalid() || Res.isUsable()) && "attributes on empty statement"); if (Attrs.empty() || Res.isInvalid()) return Res; return Actions.ActOnAttributedStmt(Attrs, Res.get()); } namespace { class StatementFilterCCC final : public CorrectionCandidateCallback { public: StatementFilterCCC(Token nextTok) : NextToken(nextTok) { WantTypeSpecifiers = nextTok.isOneOf(tok::l_paren, tok::less, tok::l_square, tok::identifier, tok::star, tok::amp); WantExpressionKeywords = nextTok.isOneOf(tok::l_paren, tok::identifier, tok::arrow, tok::period); WantRemainingKeywords = nextTok.isOneOf(tok::l_paren, tok::semi, tok::identifier, tok::l_brace); WantCXXNamedCasts = false; } bool ValidateCandidate(const TypoCorrection &candidate) override { if (FieldDecl *FD = candidate.getCorrectionDeclAs()) return !candidate.getCorrectionSpecifier() || isa(FD); if (NextToken.is(tok::equal)) return candidate.getCorrectionDeclAs(); if (NextToken.is(tok::period) && candidate.getCorrectionDeclAs()) return false; return CorrectionCandidateCallback::ValidateCandidate(candidate); } std::unique_ptr clone() override { return std::make_unique(*this); } private: Token NextToken; }; } StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( StmtVector &Stmts, ParsedStmtContext StmtCtx, SourceLocation *TrailingElseLoc, ParsedAttributes &CXX11Attrs, ParsedAttributes &GNUAttrs) { const char *SemiError = nullptr; StmtResult Res; SourceLocation GNUAttributeLoc; // Cases in this switch statement should fall through if the parser expects // the token to end in a semicolon (in which case SemiError should be set), // or they directly 'return;' if not. Retry: tok::TokenKind Kind = Tok.getKind(); SourceLocation AtLoc; switch (Kind) { case tok::at: // May be a @try or @throw statement { AtLoc = ConsumeToken(); // consume @ return ParseObjCAtStatement(AtLoc, StmtCtx); } case tok::code_completion: cutOffParsing(); Actions.CodeCompletion().CodeCompleteOrdinaryName( getCurScope(), SemaCodeCompletion::PCC_Statement); return StmtError(); case tok::identifier: ParseIdentifier: { Token Next = NextToken(); if (Next.is(tok::colon)) { // C99 6.8.1: labeled-statement // Both C++11 and GNU attributes preceding the label appertain to the // label, so put them in a single list to pass on to // ParseLabeledStatement(). ParsedAttributes Attrs(AttrFactory); takeAndConcatenateAttrs(CXX11Attrs, GNUAttrs, Attrs); // identifier ':' statement return ParseLabeledStatement(Attrs, StmtCtx); } // Look up the identifier, and typo-correct it to a keyword if it's not // found. if (Next.isNot(tok::coloncolon)) { // Try to limit which sets of keywords should be included in typo // correction based on what the next token is. StatementFilterCCC CCC(Next); if (TryAnnotateName(&CCC) == ANK_Error) { // Handle errors here by skipping up to the next semicolon or '}', and // eat the semicolon if that's what stopped us. SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch); if (Tok.is(tok::semi)) ConsumeToken(); return StmtError(); } // If the identifier was typo-corrected, try again. if (Tok.isNot(tok::identifier)) goto Retry; } // Fall through [[fallthrough]]; } default: { bool HaveAttrs = !CXX11Attrs.empty() || !GNUAttrs.empty(); auto IsStmtAttr = [](ParsedAttr &Attr) { return Attr.isStmtAttr(); }; bool AllAttrsAreStmtAttrs = llvm::all_of(CXX11Attrs, IsStmtAttr) && llvm::all_of(GNUAttrs, IsStmtAttr); // In C, the grammar production for statement (C23 6.8.1p1) does not allow // for declarations, which is different from C++ (C++23 [stmt.pre]p1). So // in C++, we always allow a declaration, but in C we need to check whether // we're in a statement context that allows declarations. e.g., in C, the // following is invalid: if (1) int x; if ((getLangOpts().CPlusPlus || getLangOpts().MicrosoftExt || (StmtCtx & ParsedStmtContext::AllowDeclarationsInC) != ParsedStmtContext()) && ((GNUAttributeLoc.isValid() && !(HaveAttrs && AllAttrsAreStmtAttrs)) || isDeclarationStatement())) { SourceLocation DeclStart = Tok.getLocation(), DeclEnd; DeclGroupPtrTy Decl; if (GNUAttributeLoc.isValid()) { DeclStart = GNUAttributeLoc; Decl = ParseDeclaration(DeclaratorContext::Block, DeclEnd, CXX11Attrs, GNUAttrs, &GNUAttributeLoc); } else { Decl = ParseDeclaration(DeclaratorContext::Block, DeclEnd, CXX11Attrs, GNUAttrs); } if (CXX11Attrs.Range.getBegin().isValid()) { // The caller must guarantee that the CXX11Attrs appear before the // GNUAttrs, and we rely on that here. assert(GNUAttrs.Range.getBegin().isInvalid() || GNUAttrs.Range.getBegin() > CXX11Attrs.Range.getBegin()); DeclStart = CXX11Attrs.Range.getBegin(); } else if (GNUAttrs.Range.getBegin().isValid()) DeclStart = GNUAttrs.Range.getBegin(); return Actions.ActOnDeclStmt(Decl, DeclStart, DeclEnd); } if (Tok.is(tok::r_brace)) { Diag(Tok, diag::err_expected_statement); return StmtError(); } switch (Tok.getKind()) { #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: #include "clang/Basic/TransformTypeTraits.def" if (NextToken().is(tok::less)) { Tok.setKind(tok::identifier); Diag(Tok, diag::ext_keyword_as_ident) << Tok.getIdentifierInfo()->getName() << 0; goto ParseIdentifier; } [[fallthrough]]; default: return ParseExprStatement(StmtCtx); } } case tok::kw___attribute: { GNUAttributeLoc = Tok.getLocation(); ParseGNUAttributes(GNUAttrs); goto Retry; } case tok::kw_case: // C99 6.8.1: labeled-statement return ParseCaseStatement(StmtCtx); case tok::kw_default: // C99 6.8.1: labeled-statement return ParseDefaultStatement(StmtCtx); case tok::l_brace: // C99 6.8.2: compound-statement return ParseCompoundStatement(); case tok::semi: { // C99 6.8.3p3: expression[opt] ';' bool HasLeadingEmptyMacro = Tok.hasLeadingEmptyMacro(); return Actions.ActOnNullStmt(ConsumeToken(), HasLeadingEmptyMacro); } case tok::kw_if: // C99 6.8.4.1: if-statement return ParseIfStatement(TrailingElseLoc); case tok::kw_switch: // C99 6.8.4.2: switch-statement return ParseSwitchStatement(TrailingElseLoc); case tok::kw_while: // C99 6.8.5.1: while-statement return ParseWhileStatement(TrailingElseLoc); case tok::kw_do: // C99 6.8.5.2: do-statement Res = ParseDoStatement(); SemiError = "do/while"; break; case tok::kw_for: // C99 6.8.5.3: for-statement return ParseForStatement(TrailingElseLoc); case tok::kw_goto: // C99 6.8.6.1: goto-statement Res = ParseGotoStatement(); SemiError = "goto"; break; case tok::kw_continue: // C99 6.8.6.2: continue-statement Res = ParseContinueStatement(); SemiError = "continue"; break; case tok::kw_break: // C99 6.8.6.3: break-statement Res = ParseBreakStatement(); SemiError = "break"; break; case tok::kw_return: // C99 6.8.6.4: return-statement Res = ParseReturnStatement(); SemiError = "return"; break; case tok::kw_co_return: // C++ Coroutines: co_return statement Res = ParseReturnStatement(); SemiError = "co_return"; break; case tok::kw_asm: { for (const ParsedAttr &AL : CXX11Attrs) // Could be relaxed if asm-related regular keyword attributes are // added later. (AL.isRegularKeywordAttribute() ? Diag(AL.getRange().getBegin(), diag::err_keyword_not_allowed) : Diag(AL.getRange().getBegin(), diag::warn_attribute_ignored)) << AL; // Prevent these from being interpreted as statement attributes later on. CXX11Attrs.clear(); ProhibitAttributes(GNUAttrs); bool msAsm = false; Res = ParseAsmStatement(msAsm); if (msAsm) return Res; SemiError = "asm"; break; } case tok::kw___if_exists: case tok::kw___if_not_exists: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); ParseMicrosoftIfExistsStatement(Stmts); // An __if_exists block is like a compound statement, but it doesn't create // a new scope. return StmtEmpty(); case tok::kw_try: // C++ 15: try-block return ParseCXXTryBlock(); case tok::kw___try: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); return ParseSEHTryBlock(); case tok::kw___leave: Res = ParseSEHLeaveStatement(); SemiError = "__leave"; break; case tok::annot_pragma_vis: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaVisibility(); return StmtEmpty(); case tok::annot_pragma_pack: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaPack(); return StmtEmpty(); case tok::annot_pragma_msstruct: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaMSStruct(); return StmtEmpty(); case tok::annot_pragma_align: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaAlign(); return StmtEmpty(); case tok::annot_pragma_weak: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaWeak(); return StmtEmpty(); case tok::annot_pragma_weakalias: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaWeakAlias(); return StmtEmpty(); case tok::annot_pragma_redefine_extname: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaRedefineExtname(); return StmtEmpty(); case tok::annot_pragma_fp_contract: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << "fp_contract"; ConsumeAnnotationToken(); return StmtError(); case tok::annot_pragma_fp: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << "clang fp"; ConsumeAnnotationToken(); return StmtError(); case tok::annot_pragma_fenv_access: case tok::annot_pragma_fenv_access_ms: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << (Kind == tok::annot_pragma_fenv_access ? "STDC FENV_ACCESS" : "fenv_access"); ConsumeAnnotationToken(); return StmtEmpty(); case tok::annot_pragma_fenv_round: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << "STDC FENV_ROUND"; ConsumeAnnotationToken(); return StmtError(); case tok::annot_pragma_cx_limited_range: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << "STDC CX_LIMITED_RANGE"; ConsumeAnnotationToken(); return StmtError(); case tok::annot_pragma_float_control: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); Diag(Tok, diag::err_pragma_file_or_compound_scope) << "float_control"; ConsumeAnnotationToken(); return StmtError(); case tok::annot_pragma_opencl_extension: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaOpenCLExtension(); return StmtEmpty(); case tok::annot_pragma_captured: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); return HandlePragmaCaptured(); case tok::annot_pragma_openmp: // Prohibit attributes that are not OpenMP attributes, but only before // processing a #pragma omp clause. ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); [[fallthrough]]; case tok::annot_attr_openmp: // Do not prohibit attributes if they were OpenMP attributes. return ParseOpenMPDeclarativeOrExecutableDirective(StmtCtx); case tok::annot_pragma_openacc: return ParseOpenACCDirectiveStmt(); case tok::annot_pragma_ms_pointers_to_members: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaMSPointersToMembers(); return StmtEmpty(); case tok::annot_pragma_ms_pragma: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaMSPragma(); return StmtEmpty(); case tok::annot_pragma_ms_vtordisp: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); HandlePragmaMSVtorDisp(); return StmtEmpty(); case tok::annot_pragma_loop_hint: ProhibitAttributes(CXX11Attrs); ProhibitAttributes(GNUAttrs); return ParsePragmaLoopHint(Stmts, StmtCtx, TrailingElseLoc, CXX11Attrs); case tok::annot_pragma_dump: HandlePragmaDump(); return StmtEmpty(); case tok::annot_pragma_attribute: HandlePragmaAttribute(); return StmtEmpty(); } // If we reached this code, the statement must end in a semicolon. if (!TryConsumeToken(tok::semi) && !Res.isInvalid()) { // If the result was valid, then we do want to diagnose this. Use // ExpectAndConsume to emit the diagnostic, even though we know it won't // succeed. ExpectAndConsume(tok::semi, diag::err_expected_semi_after_stmt, SemiError); // Skip until we see a } or ;, but don't eat it. SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch); } return Res; } /// Parse an expression statement. StmtResult Parser::ParseExprStatement(ParsedStmtContext StmtCtx) { // If a case keyword is missing, this is where it should be inserted. Token OldToken = Tok; ExprStatementTokLoc = Tok.getLocation(); // expression[opt] ';' ExprResult Expr(ParseExpression()); if (Expr.isInvalid()) { // If the expression is invalid, skip ahead to the next semicolon or '}'. // Not doing this opens us up to the possibility of infinite loops if // ParseExpression does not consume any tokens. SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch); if (Tok.is(tok::semi)) ConsumeToken(); return Actions.ActOnExprStmtError(); } if (Tok.is(tok::colon) && getCurScope()->isSwitchScope() && Actions.CheckCaseExpression(Expr.get())) { // If a constant expression is followed by a colon inside a switch block, // suggest a missing case keyword. Diag(OldToken, diag::err_expected_case_before_expression) << FixItHint::CreateInsertion(OldToken.getLocation(), "case "); // Recover parsing as a case statement. return ParseCaseStatement(StmtCtx, /*MissingCase=*/true, Expr); } Token *CurTok = nullptr; // Note we shouldn't eat the token since the callback needs it. if (Tok.is(tok::annot_repl_input_end)) CurTok = &Tok; else // Otherwise, eat the semicolon. ExpectAndConsumeSemi(diag::err_expected_semi_after_expr); StmtResult R = handleExprStmt(Expr, StmtCtx); if (CurTok && !R.isInvalid()) CurTok->setAnnotationValue(R.get()); return R; } /// ParseSEHTryBlockCommon /// /// seh-try-block: /// '__try' compound-statement seh-handler /// /// seh-handler: /// seh-except-block /// seh-finally-block /// StmtResult Parser::ParseSEHTryBlock() { assert(Tok.is(tok::kw___try) && "Expected '__try'"); SourceLocation TryLoc = ConsumeToken(); if (Tok.isNot(tok::l_brace)) return StmtError(Diag(Tok, diag::err_expected) << tok::l_brace); StmtResult TryBlock(ParseCompoundStatement( /*isStmtExpr=*/false, Scope::DeclScope | Scope::CompoundStmtScope | Scope::SEHTryScope)); if (TryBlock.isInvalid()) return TryBlock; StmtResult Handler; if (Tok.is(tok::identifier) && Tok.getIdentifierInfo() == getSEHExceptKeyword()) { SourceLocation Loc = ConsumeToken(); Handler = ParseSEHExceptBlock(Loc); } else if (Tok.is(tok::kw___finally)) { SourceLocation Loc = ConsumeToken(); Handler = ParseSEHFinallyBlock(Loc); } else { return StmtError(Diag(Tok, diag::err_seh_expected_handler)); } if(Handler.isInvalid()) return Handler; return Actions.ActOnSEHTryBlock(false /* IsCXXTry */, TryLoc, TryBlock.get(), Handler.get()); } /// ParseSEHExceptBlock - Handle __except /// /// seh-except-block: /// '__except' '(' seh-filter-expression ')' compound-statement /// StmtResult Parser::ParseSEHExceptBlock(SourceLocation ExceptLoc) { PoisonIdentifierRAIIObject raii(Ident__exception_code, false), raii2(Ident___exception_code, false), raii3(Ident_GetExceptionCode, false); if (ExpectAndConsume(tok::l_paren)) return StmtError(); ParseScope ExpectScope(this, Scope::DeclScope | Scope::ControlScope | Scope::SEHExceptScope); if (getLangOpts().Borland) { Ident__exception_info->setIsPoisoned(false); Ident___exception_info->setIsPoisoned(false); Ident_GetExceptionInfo->setIsPoisoned(false); } ExprResult FilterExpr; { ParseScopeFlags FilterScope(this, getCurScope()->getFlags() | Scope::SEHFilterScope); FilterExpr = Actions.CorrectDelayedTyposInExpr(ParseExpression()); } if (getLangOpts().Borland) { Ident__exception_info->setIsPoisoned(true); Ident___exception_info->setIsPoisoned(true); Ident_GetExceptionInfo->setIsPoisoned(true); } if(FilterExpr.isInvalid()) return StmtError(); if (ExpectAndConsume(tok::r_paren)) return StmtError(); if (Tok.isNot(tok::l_brace)) return StmtError(Diag(Tok, diag::err_expected) << tok::l_brace); StmtResult Block(ParseCompoundStatement()); if(Block.isInvalid()) return Block; return Actions.ActOnSEHExceptBlock(ExceptLoc, FilterExpr.get(), Block.get()); } /// ParseSEHFinallyBlock - Handle __finally /// /// seh-finally-block: /// '__finally' compound-statement /// StmtResult Parser::ParseSEHFinallyBlock(SourceLocation FinallyLoc) { PoisonIdentifierRAIIObject raii(Ident__abnormal_termination, false), raii2(Ident___abnormal_termination, false), raii3(Ident_AbnormalTermination, false); if (Tok.isNot(tok::l_brace)) return StmtError(Diag(Tok, diag::err_expected) << tok::l_brace); ParseScope FinallyScope(this, 0); Actions.ActOnStartSEHFinallyBlock(); StmtResult Block(ParseCompoundStatement()); if(Block.isInvalid()) { Actions.ActOnAbortSEHFinallyBlock(); return Block; } return Actions.ActOnFinishSEHFinallyBlock(FinallyLoc, Block.get()); } /// Handle __leave /// /// seh-leave-statement: /// '__leave' ';' /// StmtResult Parser::ParseSEHLeaveStatement() { SourceLocation LeaveLoc = ConsumeToken(); // eat the '__leave'. return Actions.ActOnSEHLeaveStmt(LeaveLoc, getCurScope()); } static void DiagnoseLabelFollowedByDecl(Parser &P, const Stmt *SubStmt) { // When in C mode (but not Microsoft extensions mode), diagnose use of a // label that is followed by a declaration rather than a statement. if (!P.getLangOpts().CPlusPlus && !P.getLangOpts().MicrosoftExt && isa(SubStmt)) { P.Diag(SubStmt->getBeginLoc(), P.getLangOpts().C23 ? diag::warn_c23_compat_label_followed_by_declaration : diag::ext_c_label_followed_by_declaration); } } /// ParseLabeledStatement - We have an identifier and a ':' after it. /// /// label: /// identifier ':' /// [GNU] identifier ':' attributes[opt] /// /// labeled-statement: /// label statement /// StmtResult Parser::ParseLabeledStatement(ParsedAttributes &Attrs, ParsedStmtContext StmtCtx) { assert(Tok.is(tok::identifier) && Tok.getIdentifierInfo() && "Not an identifier!"); // [OpenMP 5.1] 2.1.3: A stand-alone directive may not be used in place of a // substatement in a selection statement, in place of the loop body in an // iteration statement, or in place of the statement that follows a label. StmtCtx &= ~ParsedStmtContext::AllowStandaloneOpenMPDirectives; Token IdentTok = Tok; // Save the whole token. ConsumeToken(); // eat the identifier. assert(Tok.is(tok::colon) && "Not a label!"); // identifier ':' statement SourceLocation ColonLoc = ConsumeToken(); // Read label attributes, if present. StmtResult SubStmt; if (Tok.is(tok::kw___attribute)) { ParsedAttributes TempAttrs(AttrFactory); ParseGNUAttributes(TempAttrs); // In C++, GNU attributes only apply to the label if they are followed by a // semicolon, to disambiguate label attributes from attributes on a labeled // declaration. // // This doesn't quite match what GCC does; if the attribute list is empty // and followed by a semicolon, GCC will reject (it appears to parse the // attributes as part of a statement in that case). That looks like a bug. if (!getLangOpts().CPlusPlus || Tok.is(tok::semi)) Attrs.takeAllFrom(TempAttrs); else { StmtVector Stmts; ParsedAttributes EmptyCXX11Attrs(AttrFactory); SubStmt = ParseStatementOrDeclarationAfterAttributes( Stmts, StmtCtx, nullptr, EmptyCXX11Attrs, TempAttrs); if (!TempAttrs.empty() && !SubStmt.isInvalid()) SubStmt = Actions.ActOnAttributedStmt(TempAttrs, SubStmt.get()); } } // The label may have no statement following it if (SubStmt.isUnset() && Tok.is(tok::r_brace)) { DiagnoseLabelAtEndOfCompoundStatement(); SubStmt = Actions.ActOnNullStmt(ColonLoc); } // If we've not parsed a statement yet, parse one now. if (!SubStmt.isInvalid() && !SubStmt.isUsable()) SubStmt = ParseStatement(nullptr, StmtCtx); // Broken substmt shouldn't prevent the label from being added to the AST. if (SubStmt.isInvalid()) SubStmt = Actions.ActOnNullStmt(ColonLoc); DiagnoseLabelFollowedByDecl(*this, SubStmt.get()); LabelDecl *LD = Actions.LookupOrCreateLabel(IdentTok.getIdentifierInfo(), IdentTok.getLocation()); Actions.ProcessDeclAttributeList(Actions.CurScope, LD, Attrs); Attrs.clear(); return Actions.ActOnLabelStmt(IdentTok.getLocation(), LD, ColonLoc, SubStmt.get()); } /// ParseCaseStatement /// labeled-statement: /// 'case' constant-expression ':' statement /// [GNU] 'case' constant-expression '...' constant-expression ':' statement /// StmtResult Parser::ParseCaseStatement(ParsedStmtContext StmtCtx, bool MissingCase, ExprResult Expr) { assert((MissingCase || Tok.is(tok::kw_case)) && "Not a case stmt!"); // [OpenMP 5.1] 2.1.3: A stand-alone directive may not be used in place of a // substatement in a selection statement, in place of the loop body in an // iteration statement, or in place of the statement that follows a label. StmtCtx &= ~ParsedStmtContext::AllowStandaloneOpenMPDirectives; // It is very common for code to contain many case statements recursively // nested, as in (but usually without indentation): // case 1: // case 2: // case 3: // case 4: // case 5: etc. // // Parsing this naively works, but is both inefficient and can cause us to run // out of stack space in our recursive descent parser. As a special case, // flatten this recursion into an iterative loop. This is complex and gross, // but all the grossness is constrained to ParseCaseStatement (and some // weirdness in the actions), so this is just local grossness :). // TopLevelCase - This is the highest level we have parsed. 'case 1' in the // example above. StmtResult TopLevelCase(true); // DeepestParsedCaseStmt - This is the deepest statement we have parsed, which // gets updated each time a new case is parsed, and whose body is unset so // far. When parsing 'case 4', this is the 'case 3' node. Stmt *DeepestParsedCaseStmt = nullptr; // While we have case statements, eat and stack them. SourceLocation ColonLoc; do { SourceLocation CaseLoc = MissingCase ? Expr.get()->getExprLoc() : ConsumeToken(); // eat the 'case'. ColonLoc = SourceLocation(); if (Tok.is(tok::code_completion)) { cutOffParsing(); Actions.CodeCompletion().CodeCompleteCase(getCurScope()); return StmtError(); } /// We don't want to treat 'case x : y' as a potential typo for 'case x::y'. /// Disable this form of error recovery while we're parsing the case /// expression. ColonProtectionRAIIObject ColonProtection(*this); ExprResult LHS; if (!MissingCase) { LHS = ParseCaseExpression(CaseLoc); if (LHS.isInvalid()) { // If constant-expression is parsed unsuccessfully, recover by skipping // current case statement (moving to the colon that ends it). if (!SkipUntil(tok::colon, tok::r_brace, StopAtSemi | StopBeforeMatch)) return StmtError(); } } else { LHS = Expr; MissingCase = false; } // GNU case range extension. SourceLocation DotDotDotLoc; ExprResult RHS; if (TryConsumeToken(tok::ellipsis, DotDotDotLoc)) { Diag(DotDotDotLoc, diag::ext_gnu_case_range); RHS = ParseCaseExpression(CaseLoc); if (RHS.isInvalid()) { if (!SkipUntil(tok::colon, tok::r_brace, StopAtSemi | StopBeforeMatch)) return StmtError(); } } ColonProtection.restore(); if (TryConsumeToken(tok::colon, ColonLoc)) { } else if (TryConsumeToken(tok::semi, ColonLoc) || TryConsumeToken(tok::coloncolon, ColonLoc)) { // Treat "case blah;" or "case blah::" as a typo for "case blah:". Diag(ColonLoc, diag::err_expected_after) << "'case'" << tok::colon << FixItHint::CreateReplacement(ColonLoc, ":"); } else { SourceLocation ExpectedLoc = PP.getLocForEndOfToken(PrevTokLocation); Diag(ExpectedLoc, diag::err_expected_after) << "'case'" << tok::colon << FixItHint::CreateInsertion(ExpectedLoc, ":"); ColonLoc = ExpectedLoc; } StmtResult Case = Actions.ActOnCaseStmt(CaseLoc, LHS, DotDotDotLoc, RHS, ColonLoc); // If we had a sema error parsing this case, then just ignore it and // continue parsing the sub-stmt. if (Case.isInvalid()) { if (TopLevelCase.isInvalid()) // No parsed case stmts. return ParseStatement(/*TrailingElseLoc=*/nullptr, StmtCtx); // Otherwise, just don't add it as a nested case. } else { // If this is the first case statement we parsed, it becomes TopLevelCase. // Otherwise we link it into the current chain. Stmt *NextDeepest = Case.get(); if (TopLevelCase.isInvalid()) TopLevelCase = Case; else Actions.ActOnCaseStmtBody(DeepestParsedCaseStmt, Case.get()); DeepestParsedCaseStmt = NextDeepest; } // Handle all case statements. } while (Tok.is(tok::kw_case)); // If we found a non-case statement, start by parsing it. StmtResult SubStmt; if (Tok.is(tok::r_brace)) { // "switch (X) { case 4: }", is valid and is treated as if label was // followed by a null statement. DiagnoseLabelAtEndOfCompoundStatement(); SubStmt = Actions.ActOnNullStmt(ColonLoc); } else { SubStmt = ParseStatement(/*TrailingElseLoc=*/nullptr, StmtCtx); } // Install the body into the most deeply-nested case. if (DeepestParsedCaseStmt) { // Broken sub-stmt shouldn't prevent forming the case statement properly. if (SubStmt.isInvalid()) SubStmt = Actions.ActOnNullStmt(SourceLocation()); DiagnoseLabelFollowedByDecl(*this, SubStmt.get()); Actions.ActOnCaseStmtBody(DeepestParsedCaseStmt, SubStmt.get()); } // Return the top level parsed statement tree. return TopLevelCase; } /// ParseDefaultStatement /// labeled-statement: /// 'default' ':' statement /// Note that this does not parse the 'statement' at the end. /// StmtResult Parser::ParseDefaultStatement(ParsedStmtContext StmtCtx) { assert(Tok.is(tok::kw_default) && "Not a default stmt!"); // [OpenMP 5.1] 2.1.3: A stand-alone directive may not be used in place of a // substatement in a selection statement, in place of the loop body in an // iteration statement, or in place of the statement that follows a label. StmtCtx &= ~ParsedStmtContext::AllowStandaloneOpenMPDirectives; SourceLocation DefaultLoc = ConsumeToken(); // eat the 'default'. SourceLocation ColonLoc; if (TryConsumeToken(tok::colon, ColonLoc)) { } else if (TryConsumeToken(tok::semi, ColonLoc)) { // Treat "default;" as a typo for "default:". Diag(ColonLoc, diag::err_expected_after) << "'default'" << tok::colon << FixItHint::CreateReplacement(ColonLoc, ":"); } else { SourceLocation ExpectedLoc = PP.getLocForEndOfToken(PrevTokLocation); Diag(ExpectedLoc, diag::err_expected_after) << "'default'" << tok::colon << FixItHint::CreateInsertion(ExpectedLoc, ":"); ColonLoc = ExpectedLoc; } StmtResult SubStmt; if (Tok.is(tok::r_brace)) { // "switch (X) {... default: }", is valid and is treated as if label was // followed by a null statement. DiagnoseLabelAtEndOfCompoundStatement(); SubStmt = Actions.ActOnNullStmt(ColonLoc); } else { SubStmt = ParseStatement(/*TrailingElseLoc=*/nullptr, StmtCtx); } // Broken sub-stmt shouldn't prevent forming the case statement properly. if (SubStmt.isInvalid()) SubStmt = Actions.ActOnNullStmt(ColonLoc); DiagnoseLabelFollowedByDecl(*this, SubStmt.get()); return Actions.ActOnDefaultStmt(DefaultLoc, ColonLoc, SubStmt.get(), getCurScope()); } StmtResult Parser::ParseCompoundStatement(bool isStmtExpr) { return ParseCompoundStatement(isStmtExpr, Scope::DeclScope | Scope::CompoundStmtScope); } /// ParseCompoundStatement - Parse a "{}" block. /// /// compound-statement: [C99 6.8.2] /// { block-item-list[opt] } /// [GNU] { label-declarations block-item-list } [TODO] /// /// block-item-list: /// block-item /// block-item-list block-item /// /// block-item: /// declaration /// [GNU] '__extension__' declaration /// statement /// /// [GNU] label-declarations: /// [GNU] label-declaration /// [GNU] label-declarations label-declaration /// /// [GNU] label-declaration: /// [GNU] '__label__' identifier-list ';' /// StmtResult Parser::ParseCompoundStatement(bool isStmtExpr, unsigned ScopeFlags) { assert(Tok.is(tok::l_brace) && "Not a compound stmt!"); // Enter a scope to hold everything within the compound stmt. Compound // statements can always hold declarations. ParseScope CompoundScope(this, ScopeFlags); // Parse the statements in the body. return ParseCompoundStatementBody(isStmtExpr); } /// Parse any pragmas at the start of the compound expression. We handle these /// separately since some pragmas (FP_CONTRACT) must appear before any C /// statement in the compound, but may be intermingled with other pragmas. void Parser::ParseCompoundStatementLeadingPragmas() { bool checkForPragmas = true; while (checkForPragmas) { switch (Tok.getKind()) { case tok::annot_pragma_vis: HandlePragmaVisibility(); break; case tok::annot_pragma_pack: HandlePragmaPack(); break; case tok::annot_pragma_msstruct: HandlePragmaMSStruct(); break; case tok::annot_pragma_align: HandlePragmaAlign(); break; case tok::annot_pragma_weak: HandlePragmaWeak(); break; case tok::annot_pragma_weakalias: HandlePragmaWeakAlias(); break; case tok::annot_pragma_redefine_extname: HandlePragmaRedefineExtname(); break; case tok::annot_pragma_opencl_extension: HandlePragmaOpenCLExtension(); break; case tok::annot_pragma_fp_contract: HandlePragmaFPContract(); break; case tok::annot_pragma_fp: HandlePragmaFP(); break; case tok::annot_pragma_fenv_access: case tok::annot_pragma_fenv_access_ms: HandlePragmaFEnvAccess(); break; case tok::annot_pragma_fenv_round: HandlePragmaFEnvRound(); break; case tok::annot_pragma_cx_limited_range: HandlePragmaCXLimitedRange(); break; case tok::annot_pragma_float_control: HandlePragmaFloatControl(); break; case tok::annot_pragma_ms_pointers_to_members: HandlePragmaMSPointersToMembers(); break; case tok::annot_pragma_ms_pragma: HandlePragmaMSPragma(); break; case tok::annot_pragma_ms_vtordisp: HandlePragmaMSVtorDisp(); break; case tok::annot_pragma_dump: HandlePragmaDump(); break; default: checkForPragmas = false; break; } } } void Parser::DiagnoseLabelAtEndOfCompoundStatement() { if (getLangOpts().CPlusPlus) { Diag(Tok, getLangOpts().CPlusPlus23 ? diag::warn_cxx20_compat_label_end_of_compound_statement : diag::ext_cxx_label_end_of_compound_statement); } else { Diag(Tok, getLangOpts().C23 ? diag::warn_c23_compat_label_end_of_compound_statement : diag::ext_c_label_end_of_compound_statement); } } /// Consume any extra semi-colons resulting in null statements, /// returning true if any tok::semi were consumed. bool Parser::ConsumeNullStmt(StmtVector &Stmts) { if (!Tok.is(tok::semi)) return false; SourceLocation StartLoc = Tok.getLocation(); SourceLocation EndLoc; while (Tok.is(tok::semi) && !Tok.hasLeadingEmptyMacro() && Tok.getLocation().isValid() && !Tok.getLocation().isMacroID()) { EndLoc = Tok.getLocation(); // Don't just ConsumeToken() this tok::semi, do store it in AST. StmtResult R = ParseStatementOrDeclaration(Stmts, ParsedStmtContext::SubStmt); if (R.isUsable()) Stmts.push_back(R.get()); } // Did not consume any extra semi. if (EndLoc.isInvalid()) return false; Diag(StartLoc, diag::warn_null_statement) << FixItHint::CreateRemoval(SourceRange(StartLoc, EndLoc)); return true; } StmtResult Parser::handleExprStmt(ExprResult E, ParsedStmtContext StmtCtx) { bool IsStmtExprResult = false; if ((StmtCtx & ParsedStmtContext::InStmtExpr) != ParsedStmtContext()) { // For GCC compatibility we skip past NullStmts. unsigned LookAhead = 0; while (GetLookAheadToken(LookAhead).is(tok::semi)) { ++LookAhead; } // Then look to see if the next two tokens close the statement expression; // if so, this expression statement is the last statement in a statement // expression. IsStmtExprResult = GetLookAheadToken(LookAhead).is(tok::r_brace) && GetLookAheadToken(LookAhead + 1).is(tok::r_paren); } if (IsStmtExprResult) E = Actions.ActOnStmtExprResult(E); return Actions.ActOnExprStmt(E, /*DiscardedValue=*/!IsStmtExprResult); } /// ParseCompoundStatementBody - Parse a sequence of statements optionally /// followed by a label and invoke the ActOnCompoundStmt action. This expects /// the '{' to be the current token, and consume the '}' at the end of the /// block. It does not manipulate the scope stack. StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { PrettyStackTraceLoc CrashInfo(PP.getSourceManager(), Tok.getLocation(), "in compound statement ('{}')"); // Record the current FPFeatures, restore on leaving the // compound statement. Sema::FPFeaturesStateRAII SaveFPFeatures(Actions); InMessageExpressionRAIIObject InMessage(*this, false); BalancedDelimiterTracker T(*this, tok::l_brace); if (T.consumeOpen()) return StmtError(); Sema::CompoundScopeRAII CompoundScope(Actions, isStmtExpr); // Parse any pragmas at the beginning of the compound statement. ParseCompoundStatementLeadingPragmas(); Actions.ActOnAfterCompoundStatementLeadingPragmas(); StmtVector Stmts; // "__label__ X, Y, Z;" is the GNU "Local Label" extension. These are // only allowed at the start of a compound stmt regardless of the language. while (Tok.is(tok::kw___label__)) { SourceLocation LabelLoc = ConsumeToken(); SmallVector DeclsInGroup; while (true) { if (Tok.isNot(tok::identifier)) { Diag(Tok, diag::err_expected) << tok::identifier; break; } IdentifierInfo *II = Tok.getIdentifierInfo(); SourceLocation IdLoc = ConsumeToken(); DeclsInGroup.push_back(Actions.LookupOrCreateLabel(II, IdLoc, LabelLoc)); if (!TryConsumeToken(tok::comma)) break; } DeclSpec DS(AttrFactory); DeclGroupPtrTy Res = Actions.FinalizeDeclaratorGroup(getCurScope(), DS, DeclsInGroup); StmtResult R = Actions.ActOnDeclStmt(Res, LabelLoc, Tok.getLocation()); ExpectAndConsumeSemi(diag::err_expected_semi_declaration); if (R.isUsable()) Stmts.push_back(R.get()); } ParsedStmtContext SubStmtCtx = ParsedStmtContext::Compound | (isStmtExpr ? ParsedStmtContext::InStmtExpr : ParsedStmtContext()); while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { if (Tok.is(tok::annot_pragma_unused)) { HandlePragmaUnused(); continue; } if (ConsumeNullStmt(Stmts)) continue; StmtResult R; if (Tok.isNot(tok::kw___extension__)) { R = ParseStatementOrDeclaration(Stmts, SubStmtCtx); } else { // __extension__ can start declarations and it can also be a unary // operator for expressions. Consume multiple __extension__ markers here // until we can determine which is which. // FIXME: This loses extension expressions in the AST! SourceLocation ExtLoc = ConsumeToken(); while (Tok.is(tok::kw___extension__)) ConsumeToken(); ParsedAttributes attrs(AttrFactory); MaybeParseCXX11Attributes(attrs, /*MightBeObjCMessageSend*/ true); // If this is the start of a declaration, parse it as such. if (isDeclarationStatement()) { // __extension__ silences extension warnings in the subdeclaration. // FIXME: Save the __extension__ on the decl as a node somehow? ExtensionRAIIObject O(Diags); SourceLocation DeclStart = Tok.getLocation(), DeclEnd; ParsedAttributes DeclSpecAttrs(AttrFactory); DeclGroupPtrTy Res = ParseDeclaration(DeclaratorContext::Block, DeclEnd, attrs, DeclSpecAttrs); R = Actions.ActOnDeclStmt(Res, DeclStart, DeclEnd); } else { // Otherwise this was a unary __extension__ marker. ExprResult Res(ParseExpressionWithLeadingExtension(ExtLoc)); if (Res.isInvalid()) { SkipUntil(tok::semi); continue; } // Eat the semicolon at the end of stmt and convert the expr into a // statement. ExpectAndConsumeSemi(diag::err_expected_semi_after_expr); R = handleExprStmt(Res, SubStmtCtx); if (R.isUsable()) R = Actions.ActOnAttributedStmt(attrs, R.get()); } } if (R.isUsable()) Stmts.push_back(R.get()); } // Warn the user that using option `-ffp-eval-method=source` on a // 32-bit target and feature `sse` disabled, or using // `pragma clang fp eval_method=source` and feature `sse` disabled, is not // supported. if (!PP.getTargetInfo().supportSourceEvalMethod() && (PP.getLastFPEvalPragmaLocation().isValid() || PP.getCurrentFPEvalMethod() == LangOptions::FPEvalMethodKind::FEM_Source)) Diag(Tok.getLocation(), diag::warn_no_support_for_eval_method_source_on_m32); SourceLocation CloseLoc = Tok.getLocation(); // We broke out of the while loop because we found a '}' or EOF. if (!T.consumeClose()) { // If this is the '})' of a statement expression, check that it's written // in a sensible way. if (isStmtExpr && Tok.is(tok::r_paren)) checkCompoundToken(CloseLoc, tok::r_brace, CompoundToken::StmtExprEnd); } else { // Recover by creating a compound statement with what we parsed so far, // instead of dropping everything and returning StmtError(). } if (T.getCloseLocation().isValid()) CloseLoc = T.getCloseLocation(); return Actions.ActOnCompoundStmt(T.getOpenLocation(), CloseLoc, Stmts, isStmtExpr); } /// ParseParenExprOrCondition: /// [C ] '(' expression ')' /// [C++] '(' condition ')' /// [C++1z] '(' init-statement[opt] condition ')' /// /// This function parses and performs error recovery on the specified condition /// or expression (depending on whether we're in C++ or C mode). This function /// goes out of its way to recover well. It returns true if there was a parser /// error (the right paren couldn't be found), which indicates that the caller /// should try to recover harder. It returns false if the condition is /// successfully parsed. Note that a successful parse can still have semantic /// errors in the condition. /// Additionally, it will assign the location of the outer-most '(' and ')', /// to LParenLoc and RParenLoc, respectively. bool Parser::ParseParenExprOrCondition(StmtResult *InitStmt, Sema::ConditionResult &Cond, SourceLocation Loc, Sema::ConditionKind CK, SourceLocation &LParenLoc, SourceLocation &RParenLoc) { BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); SourceLocation Start = Tok.getLocation(); if (getLangOpts().CPlusPlus) { Cond = ParseCXXCondition(InitStmt, Loc, CK, false); } else { ExprResult CondExpr = ParseExpression(); // If required, convert to a boolean value. if (CondExpr.isInvalid()) Cond = Sema::ConditionError(); else Cond = Actions.ActOnCondition(getCurScope(), Loc, CondExpr.get(), CK, /*MissingOK=*/false); } // If the parser was confused by the condition and we don't have a ')', try to // recover by skipping ahead to a semi and bailing out. If condexp is // semantically invalid but we have well formed code, keep going. if (Cond.isInvalid() && Tok.isNot(tok::r_paren)) { SkipUntil(tok::semi); // Skipping may have stopped if it found the containing ')'. If so, we can // continue parsing the if statement. if (Tok.isNot(tok::r_paren)) return true; } if (Cond.isInvalid()) { ExprResult CondExpr = Actions.CreateRecoveryExpr( Start, Tok.getLocation() == Start ? Start : PrevTokLocation, {}, Actions.PreferredConditionType(CK)); if (!CondExpr.isInvalid()) Cond = Actions.ActOnCondition(getCurScope(), Loc, CondExpr.get(), CK, /*MissingOK=*/false); } // Either the condition is valid or the rparen is present. T.consumeClose(); LParenLoc = T.getOpenLocation(); RParenLoc = T.getCloseLocation(); // Check for extraneous ')'s to catch things like "if (foo())) {". We know // that all callers are looking for a statement after the condition, so ")" // isn't valid. while (Tok.is(tok::r_paren)) { Diag(Tok, diag::err_extraneous_rparen_in_condition) << FixItHint::CreateRemoval(Tok.getLocation()); ConsumeParen(); } return false; } namespace { enum MisleadingStatementKind { MSK_if, MSK_else, MSK_for, MSK_while }; struct MisleadingIndentationChecker { Parser &P; SourceLocation StmtLoc; SourceLocation PrevLoc; unsigned NumDirectives; MisleadingStatementKind Kind; bool ShouldSkip; MisleadingIndentationChecker(Parser &P, MisleadingStatementKind K, SourceLocation SL) : P(P), StmtLoc(SL), PrevLoc(P.getCurToken().getLocation()), NumDirectives(P.getPreprocessor().getNumDirectives()), Kind(K), ShouldSkip(P.getCurToken().is(tok::l_brace)) { if (!P.MisleadingIndentationElseLoc.isInvalid()) { StmtLoc = P.MisleadingIndentationElseLoc; P.MisleadingIndentationElseLoc = SourceLocation(); } if (Kind == MSK_else && !ShouldSkip) P.MisleadingIndentationElseLoc = SL; } /// Compute the column number will aligning tabs on TabStop (-ftabstop), this /// gives the visual indentation of the SourceLocation. static unsigned getVisualIndentation(SourceManager &SM, SourceLocation Loc) { unsigned TabStop = SM.getDiagnostics().getDiagnosticOptions().TabStop; unsigned ColNo = SM.getSpellingColumnNumber(Loc); if (ColNo == 0 || TabStop == 1) return ColNo; std::pair FIDAndOffset = SM.getDecomposedLoc(Loc); bool Invalid; StringRef BufData = SM.getBufferData(FIDAndOffset.first, &Invalid); if (Invalid) return 0; const char *EndPos = BufData.data() + FIDAndOffset.second; // FileOffset are 0-based and Column numbers are 1-based assert(FIDAndOffset.second + 1 >= ColNo && "Column number smaller than file offset?"); unsigned VisualColumn = 0; // Stored as 0-based column, here. // Loop from beginning of line up to Loc's file position, counting columns, // expanding tabs. for (const char *CurPos = EndPos - (ColNo - 1); CurPos != EndPos; ++CurPos) { if (*CurPos == '\t') // Advance visual column to next tabstop. VisualColumn += (TabStop - VisualColumn % TabStop); else VisualColumn++; } return VisualColumn + 1; } void Check() { Token Tok = P.getCurToken(); if (P.getActions().getDiagnostics().isIgnored( diag::warn_misleading_indentation, Tok.getLocation()) || ShouldSkip || NumDirectives != P.getPreprocessor().getNumDirectives() || Tok.isOneOf(tok::semi, tok::r_brace) || Tok.isAnnotation() || Tok.getLocation().isMacroID() || PrevLoc.isMacroID() || StmtLoc.isMacroID() || (Kind == MSK_else && P.MisleadingIndentationElseLoc.isInvalid())) { P.MisleadingIndentationElseLoc = SourceLocation(); return; } if (Kind == MSK_else) P.MisleadingIndentationElseLoc = SourceLocation(); SourceManager &SM = P.getPreprocessor().getSourceManager(); unsigned PrevColNum = getVisualIndentation(SM, PrevLoc); unsigned CurColNum = getVisualIndentation(SM, Tok.getLocation()); unsigned StmtColNum = getVisualIndentation(SM, StmtLoc); if (PrevColNum != 0 && CurColNum != 0 && StmtColNum != 0 && ((PrevColNum > StmtColNum && PrevColNum == CurColNum) || !Tok.isAtStartOfLine()) && SM.getPresumedLineNumber(StmtLoc) != SM.getPresumedLineNumber(Tok.getLocation()) && (Tok.isNot(tok::identifier) || P.getPreprocessor().LookAhead(0).isNot(tok::colon))) { P.Diag(Tok.getLocation(), diag::warn_misleading_indentation) << Kind; P.Diag(StmtLoc, diag::note_previous_statement); } } }; } /// ParseIfStatement /// if-statement: [C99 6.8.4.1] /// 'if' '(' expression ')' statement /// 'if' '(' expression ')' statement 'else' statement /// [C++] 'if' '(' condition ')' statement /// [C++] 'if' '(' condition ')' statement 'else' statement /// [C++23] 'if' '!' [opt] consteval compound-statement /// [C++23] 'if' '!' [opt] consteval compound-statement 'else' statement /// StmtResult Parser::ParseIfStatement(SourceLocation *TrailingElseLoc) { assert(Tok.is(tok::kw_if) && "Not an if stmt!"); SourceLocation IfLoc = ConsumeToken(); // eat the 'if'. bool IsConstexpr = false; bool IsConsteval = false; SourceLocation NotLocation; SourceLocation ConstevalLoc; if (Tok.is(tok::kw_constexpr)) { - Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if - : diag::ext_constexpr_if); - IsConstexpr = true; - ConsumeToken(); + // C23 supports constexpr keyword, but only for object definitions. + if (getLangOpts().CPlusPlus) { + Diag(Tok, getLangOpts().CPlusPlus17 ? diag::warn_cxx14_compat_constexpr_if + : diag::ext_constexpr_if); + IsConstexpr = true; + ConsumeToken(); + } } else { if (Tok.is(tok::exclaim)) { NotLocation = ConsumeToken(); } if (Tok.is(tok::kw_consteval)) { Diag(Tok, getLangOpts().CPlusPlus23 ? diag::warn_cxx20_compat_consteval_if : diag::ext_consteval_if); IsConsteval = true; ConstevalLoc = ConsumeToken(); } } if (!IsConsteval && (NotLocation.isValid() || Tok.isNot(tok::l_paren))) { Diag(Tok, diag::err_expected_lparen_after) << "if"; SkipUntil(tok::semi); return StmtError(); } bool C99orCXX = getLangOpts().C99 || getLangOpts().CPlusPlus; // C99 6.8.4p3 - In C99, the if statement is a block. This is not // the case for C90. // // C++ 6.4p3: // A name introduced by a declaration in a condition is in scope from its // point of declaration until the end of the substatements controlled by the // condition. // C++ 3.3.2p4: // Names declared in the for-init-statement, and in the condition of if, // while, for, and switch statements are local to the if, while, for, or // switch statement (including the controlled statement). // ParseScope IfScope(this, Scope::DeclScope | Scope::ControlScope, C99orCXX); // Parse the condition. StmtResult InitStmt; Sema::ConditionResult Cond; SourceLocation LParen; SourceLocation RParen; std::optional ConstexprCondition; if (!IsConsteval) { if (ParseParenExprOrCondition(&InitStmt, Cond, IfLoc, IsConstexpr ? Sema::ConditionKind::ConstexprIf : Sema::ConditionKind::Boolean, LParen, RParen)) return StmtError(); if (IsConstexpr) ConstexprCondition = Cond.getKnownValue(); } bool IsBracedThen = Tok.is(tok::l_brace); // C99 6.8.4p3 - In C99, the body of the if statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do this // if the body isn't a compound statement to avoid push/pop in common cases. // // C++ 6.4p1: // The substatement in a selection-statement (each substatement, in the else // form of the if statement) implicitly defines a local scope. // // For C++ we create a scope for the condition and a new scope for // substatements because: // -When the 'then' scope exits, we want the condition declaration to still be // active for the 'else' scope too. // -Sema will detect name clashes by considering declarations of a // 'ControlScope' as part of its direct subscope. // -If we wanted the condition and substatement to be in the same scope, we // would have to notify ParseStatement not to create a new scope. It's // simpler to let it create a new scope. // ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, IsBracedThen); MisleadingIndentationChecker MIChecker(*this, MSK_if, IfLoc); // Read the 'then' stmt. SourceLocation ThenStmtLoc = Tok.getLocation(); SourceLocation InnerStatementTrailingElseLoc; StmtResult ThenStmt; { bool ShouldEnter = ConstexprCondition && !*ConstexprCondition; Sema::ExpressionEvaluationContext Context = Sema::ExpressionEvaluationContext::DiscardedStatement; if (NotLocation.isInvalid() && IsConsteval) { Context = Sema::ExpressionEvaluationContext::ImmediateFunctionContext; ShouldEnter = true; } EnterExpressionEvaluationContext PotentiallyDiscarded( Actions, Context, nullptr, Sema::ExpressionEvaluationContextRecord::EK_Other, ShouldEnter); ThenStmt = ParseStatement(&InnerStatementTrailingElseLoc); } if (Tok.isNot(tok::kw_else)) MIChecker.Check(); // Pop the 'if' scope if needed. InnerScope.Exit(); // If it has an else, parse it. SourceLocation ElseLoc; SourceLocation ElseStmtLoc; StmtResult ElseStmt; if (Tok.is(tok::kw_else)) { if (TrailingElseLoc) *TrailingElseLoc = Tok.getLocation(); ElseLoc = ConsumeToken(); ElseStmtLoc = Tok.getLocation(); // C99 6.8.4p3 - In C99, the body of the if statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do // this if the body isn't a compound statement to avoid push/pop in common // cases. // // C++ 6.4p1: // The substatement in a selection-statement (each substatement, in the else // form of the if statement) implicitly defines a local scope. // ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace)); MisleadingIndentationChecker MIChecker(*this, MSK_else, ElseLoc); bool ShouldEnter = ConstexprCondition && *ConstexprCondition; Sema::ExpressionEvaluationContext Context = Sema::ExpressionEvaluationContext::DiscardedStatement; if (NotLocation.isValid() && IsConsteval) { Context = Sema::ExpressionEvaluationContext::ImmediateFunctionContext; ShouldEnter = true; } EnterExpressionEvaluationContext PotentiallyDiscarded( Actions, Context, nullptr, Sema::ExpressionEvaluationContextRecord::EK_Other, ShouldEnter); ElseStmt = ParseStatement(); if (ElseStmt.isUsable()) MIChecker.Check(); // Pop the 'else' scope if needed. InnerScope.Exit(); } else if (Tok.is(tok::code_completion)) { cutOffParsing(); Actions.CodeCompletion().CodeCompleteAfterIf(getCurScope(), IsBracedThen); return StmtError(); } else if (InnerStatementTrailingElseLoc.isValid()) { Diag(InnerStatementTrailingElseLoc, diag::warn_dangling_else); } IfScope.Exit(); // If the then or else stmt is invalid and the other is valid (and present), // turn the invalid one into a null stmt to avoid dropping the other // part. If both are invalid, return error. if ((ThenStmt.isInvalid() && ElseStmt.isInvalid()) || (ThenStmt.isInvalid() && ElseStmt.get() == nullptr) || (ThenStmt.get() == nullptr && ElseStmt.isInvalid())) { // Both invalid, or one is invalid and other is non-present: return error. return StmtError(); } if (IsConsteval) { auto IsCompoundStatement = [](const Stmt *S) { if (const auto *Outer = dyn_cast_if_present(S)) S = Outer->getSubStmt(); return isa_and_nonnull(S); }; if (!IsCompoundStatement(ThenStmt.get())) { Diag(ConstevalLoc, diag::err_expected_after) << "consteval" << "{"; return StmtError(); } if (!ElseStmt.isUnset() && !IsCompoundStatement(ElseStmt.get())) { Diag(ElseLoc, diag::err_expected_after) << "else" << "{"; return StmtError(); } } // Now if either are invalid, replace with a ';'. if (ThenStmt.isInvalid()) ThenStmt = Actions.ActOnNullStmt(ThenStmtLoc); if (ElseStmt.isInvalid()) ElseStmt = Actions.ActOnNullStmt(ElseStmtLoc); IfStatementKind Kind = IfStatementKind::Ordinary; if (IsConstexpr) Kind = IfStatementKind::Constexpr; else if (IsConsteval) Kind = NotLocation.isValid() ? IfStatementKind::ConstevalNegated : IfStatementKind::ConstevalNonNegated; return Actions.ActOnIfStmt(IfLoc, Kind, LParen, InitStmt.get(), Cond, RParen, ThenStmt.get(), ElseLoc, ElseStmt.get()); } /// ParseSwitchStatement /// switch-statement: /// 'switch' '(' expression ')' statement /// [C++] 'switch' '(' condition ')' statement StmtResult Parser::ParseSwitchStatement(SourceLocation *TrailingElseLoc) { assert(Tok.is(tok::kw_switch) && "Not a switch stmt!"); SourceLocation SwitchLoc = ConsumeToken(); // eat the 'switch'. if (Tok.isNot(tok::l_paren)) { Diag(Tok, diag::err_expected_lparen_after) << "switch"; SkipUntil(tok::semi); return StmtError(); } bool C99orCXX = getLangOpts().C99 || getLangOpts().CPlusPlus; // C99 6.8.4p3 - In C99, the switch statement is a block. This is // not the case for C90. Start the switch scope. // // C++ 6.4p3: // A name introduced by a declaration in a condition is in scope from its // point of declaration until the end of the substatements controlled by the // condition. // C++ 3.3.2p4: // Names declared in the for-init-statement, and in the condition of if, // while, for, and switch statements are local to the if, while, for, or // switch statement (including the controlled statement). // unsigned ScopeFlags = Scope::SwitchScope; if (C99orCXX) ScopeFlags |= Scope::DeclScope | Scope::ControlScope; ParseScope SwitchScope(this, ScopeFlags); // Parse the condition. StmtResult InitStmt; Sema::ConditionResult Cond; SourceLocation LParen; SourceLocation RParen; if (ParseParenExprOrCondition(&InitStmt, Cond, SwitchLoc, Sema::ConditionKind::Switch, LParen, RParen)) return StmtError(); StmtResult Switch = Actions.ActOnStartOfSwitchStmt( SwitchLoc, LParen, InitStmt.get(), Cond, RParen); if (Switch.isInvalid()) { // Skip the switch body. // FIXME: This is not optimal recovery, but parsing the body is more // dangerous due to the presence of case and default statements, which // will have no place to connect back with the switch. if (Tok.is(tok::l_brace)) { ConsumeBrace(); SkipUntil(tok::r_brace); } else SkipUntil(tok::semi); return Switch; } // C99 6.8.4p3 - In C99, the body of the switch statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do this // if the body isn't a compound statement to avoid push/pop in common cases. // // C++ 6.4p1: // The substatement in a selection-statement (each substatement, in the else // form of the if statement) implicitly defines a local scope. // // See comments in ParseIfStatement for why we create a scope for the // condition and a new scope for substatement in C++. // getCurScope()->AddFlags(Scope::BreakScope); ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace)); // We have incremented the mangling number for the SwitchScope and the // InnerScope, which is one too many. if (C99orCXX) getCurScope()->decrementMSManglingNumber(); // Read the body statement. StmtResult Body(ParseStatement(TrailingElseLoc)); // Pop the scopes. InnerScope.Exit(); SwitchScope.Exit(); return Actions.ActOnFinishSwitchStmt(SwitchLoc, Switch.get(), Body.get()); } /// ParseWhileStatement /// while-statement: [C99 6.8.5.1] /// 'while' '(' expression ')' statement /// [C++] 'while' '(' condition ')' statement StmtResult Parser::ParseWhileStatement(SourceLocation *TrailingElseLoc) { assert(Tok.is(tok::kw_while) && "Not a while stmt!"); SourceLocation WhileLoc = Tok.getLocation(); ConsumeToken(); // eat the 'while'. if (Tok.isNot(tok::l_paren)) { Diag(Tok, diag::err_expected_lparen_after) << "while"; SkipUntil(tok::semi); return StmtError(); } bool C99orCXX = getLangOpts().C99 || getLangOpts().CPlusPlus; // C99 6.8.5p5 - In C99, the while statement is a block. This is not // the case for C90. Start the loop scope. // // C++ 6.4p3: // A name introduced by a declaration in a condition is in scope from its // point of declaration until the end of the substatements controlled by the // condition. // C++ 3.3.2p4: // Names declared in the for-init-statement, and in the condition of if, // while, for, and switch statements are local to the if, while, for, or // switch statement (including the controlled statement). // unsigned ScopeFlags; if (C99orCXX) ScopeFlags = Scope::BreakScope | Scope::ContinueScope | Scope::DeclScope | Scope::ControlScope; else ScopeFlags = Scope::BreakScope | Scope::ContinueScope; ParseScope WhileScope(this, ScopeFlags); // Parse the condition. Sema::ConditionResult Cond; SourceLocation LParen; SourceLocation RParen; if (ParseParenExprOrCondition(nullptr, Cond, WhileLoc, Sema::ConditionKind::Boolean, LParen, RParen)) return StmtError(); // C99 6.8.5p5 - In C99, the body of the while statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do this // if the body isn't a compound statement to avoid push/pop in common cases. // // C++ 6.5p2: // The substatement in an iteration-statement implicitly defines a local scope // which is entered and exited each time through the loop. // // See comments in ParseIfStatement for why we create a scope for the // condition and a new scope for substatement in C++. // ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace)); MisleadingIndentationChecker MIChecker(*this, MSK_while, WhileLoc); // Read the body statement. StmtResult Body(ParseStatement(TrailingElseLoc)); if (Body.isUsable()) MIChecker.Check(); // Pop the body scope if needed. InnerScope.Exit(); WhileScope.Exit(); if (Cond.isInvalid() || Body.isInvalid()) return StmtError(); return Actions.ActOnWhileStmt(WhileLoc, LParen, Cond, RParen, Body.get()); } /// ParseDoStatement /// do-statement: [C99 6.8.5.2] /// 'do' statement 'while' '(' expression ')' ';' /// Note: this lets the caller parse the end ';'. StmtResult Parser::ParseDoStatement() { assert(Tok.is(tok::kw_do) && "Not a do stmt!"); SourceLocation DoLoc = ConsumeToken(); // eat the 'do'. // C99 6.8.5p5 - In C99, the do statement is a block. This is not // the case for C90. Start the loop scope. unsigned ScopeFlags; if (getLangOpts().C99) ScopeFlags = Scope::BreakScope | Scope::ContinueScope | Scope::DeclScope; else ScopeFlags = Scope::BreakScope | Scope::ContinueScope; ParseScope DoScope(this, ScopeFlags); // C99 6.8.5p5 - In C99, the body of the do statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do this // if the body isn't a compound statement to avoid push/pop in common cases. // // C++ 6.5p2: // The substatement in an iteration-statement implicitly defines a local scope // which is entered and exited each time through the loop. // bool C99orCXX = getLangOpts().C99 || getLangOpts().CPlusPlus; ParseScope InnerScope(this, Scope::DeclScope, C99orCXX, Tok.is(tok::l_brace)); // Read the body statement. StmtResult Body(ParseStatement()); // Pop the body scope if needed. InnerScope.Exit(); if (Tok.isNot(tok::kw_while)) { if (!Body.isInvalid()) { Diag(Tok, diag::err_expected_while); Diag(DoLoc, diag::note_matching) << "'do'"; SkipUntil(tok::semi, StopBeforeMatch); } return StmtError(); } SourceLocation WhileLoc = ConsumeToken(); if (Tok.isNot(tok::l_paren)) { Diag(Tok, diag::err_expected_lparen_after) << "do/while"; SkipUntil(tok::semi, StopBeforeMatch); return StmtError(); } // Parse the parenthesized expression. BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); // A do-while expression is not a condition, so can't have attributes. DiagnoseAndSkipCXX11Attributes(); SourceLocation Start = Tok.getLocation(); ExprResult Cond = ParseExpression(); // Correct the typos in condition before closing the scope. if (Cond.isUsable()) Cond = Actions.CorrectDelayedTyposInExpr(Cond, /*InitDecl=*/nullptr, /*RecoverUncorrectedTypos=*/true); else { if (!Tok.isOneOf(tok::r_paren, tok::r_square, tok::r_brace)) SkipUntil(tok::semi); Cond = Actions.CreateRecoveryExpr( Start, Start == Tok.getLocation() ? Start : PrevTokLocation, {}, Actions.getASTContext().BoolTy); } T.consumeClose(); DoScope.Exit(); if (Cond.isInvalid() || Body.isInvalid()) return StmtError(); return Actions.ActOnDoStmt(DoLoc, Body.get(), WhileLoc, T.getOpenLocation(), Cond.get(), T.getCloseLocation()); } bool Parser::isForRangeIdentifier() { assert(Tok.is(tok::identifier)); const Token &Next = NextToken(); if (Next.is(tok::colon)) return true; if (Next.isOneOf(tok::l_square, tok::kw_alignas)) { TentativeParsingAction PA(*this); ConsumeToken(); SkipCXX11Attributes(); bool Result = Tok.is(tok::colon); PA.Revert(); return Result; } return false; } /// ParseForStatement /// for-statement: [C99 6.8.5.3] /// 'for' '(' expr[opt] ';' expr[opt] ';' expr[opt] ')' statement /// 'for' '(' declaration expr[opt] ';' expr[opt] ')' statement /// [C++] 'for' '(' for-init-statement condition[opt] ';' expression[opt] ')' /// [C++] statement /// [C++0x] 'for' /// 'co_await'[opt] [Coroutines] /// '(' for-range-declaration ':' for-range-initializer ')' /// statement /// [OBJC2] 'for' '(' declaration 'in' expr ')' statement /// [OBJC2] 'for' '(' expr 'in' expr ')' statement /// /// [C++] for-init-statement: /// [C++] expression-statement /// [C++] simple-declaration /// [C++23] alias-declaration /// /// [C++0x] for-range-declaration: /// [C++0x] attribute-specifier-seq[opt] type-specifier-seq declarator /// [C++0x] for-range-initializer: /// [C++0x] expression /// [C++0x] braced-init-list [TODO] StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) { assert(Tok.is(tok::kw_for) && "Not a for stmt!"); SourceLocation ForLoc = ConsumeToken(); // eat the 'for'. SourceLocation CoawaitLoc; if (Tok.is(tok::kw_co_await)) CoawaitLoc = ConsumeToken(); if (Tok.isNot(tok::l_paren)) { Diag(Tok, diag::err_expected_lparen_after) << "for"; SkipUntil(tok::semi); return StmtError(); } bool C99orCXXorObjC = getLangOpts().C99 || getLangOpts().CPlusPlus || getLangOpts().ObjC; // C99 6.8.5p5 - In C99, the for statement is a block. This is not // the case for C90. Start the loop scope. // // C++ 6.4p3: // A name introduced by a declaration in a condition is in scope from its // point of declaration until the end of the substatements controlled by the // condition. // C++ 3.3.2p4: // Names declared in the for-init-statement, and in the condition of if, // while, for, and switch statements are local to the if, while, for, or // switch statement (including the controlled statement). // C++ 6.5.3p1: // Names declared in the for-init-statement are in the same declarative-region // as those declared in the condition. // unsigned ScopeFlags = 0; if (C99orCXXorObjC) ScopeFlags = Scope::DeclScope | Scope::ControlScope; ParseScope ForScope(this, ScopeFlags); BalancedDelimiterTracker T(*this, tok::l_paren); T.consumeOpen(); ExprResult Value; bool ForEach = false; StmtResult FirstPart; Sema::ConditionResult SecondPart; ExprResult Collection; ForRangeInfo ForRangeInfo; FullExprArg ThirdPart(Actions); if (Tok.is(tok::code_completion)) { cutOffParsing(); Actions.CodeCompletion().CodeCompleteOrdinaryName( getCurScope(), C99orCXXorObjC ? SemaCodeCompletion::PCC_ForInit : SemaCodeCompletion::PCC_Expression); return StmtError(); } ParsedAttributes attrs(AttrFactory); MaybeParseCXX11Attributes(attrs); SourceLocation EmptyInitStmtSemiLoc; // Parse the first part of the for specifier. if (Tok.is(tok::semi)) { // for (; ProhibitAttributes(attrs); // no first part, eat the ';'. SourceLocation SemiLoc = Tok.getLocation(); if (!Tok.hasLeadingEmptyMacro() && !SemiLoc.isMacroID()) EmptyInitStmtSemiLoc = SemiLoc; ConsumeToken(); } else if (getLangOpts().CPlusPlus && Tok.is(tok::identifier) && isForRangeIdentifier()) { ProhibitAttributes(attrs); IdentifierInfo *Name = Tok.getIdentifierInfo(); SourceLocation Loc = ConsumeToken(); MaybeParseCXX11Attributes(attrs); ForRangeInfo.ColonLoc = ConsumeToken(); if (Tok.is(tok::l_brace)) ForRangeInfo.RangeExpr = ParseBraceInitializer(); else ForRangeInfo.RangeExpr = ParseExpression(); Diag(Loc, diag::err_for_range_identifier) << ((getLangOpts().CPlusPlus11 && !getLangOpts().CPlusPlus17) ? FixItHint::CreateInsertion(Loc, "auto &&") : FixItHint()); ForRangeInfo.LoopVar = Actions.ActOnCXXForRangeIdentifier(getCurScope(), Loc, Name, attrs); } else if (isForInitDeclaration()) { // for (int X = 4; ParenBraceBracketBalancer BalancerRAIIObj(*this); // Parse declaration, which eats the ';'. if (!C99orCXXorObjC) { // Use of C99-style for loops in C90 mode? Diag(Tok, diag::ext_c99_variable_decl_in_for_loop); Diag(Tok, diag::warn_gcc_variable_decl_in_for_loop); } DeclGroupPtrTy DG; SourceLocation DeclStart = Tok.getLocation(), DeclEnd; if (Tok.is(tok::kw_using)) { DG = ParseAliasDeclarationInInitStatement(DeclaratorContext::ForInit, attrs); FirstPart = Actions.ActOnDeclStmt(DG, DeclStart, Tok.getLocation()); } else { // In C++0x, "for (T NS:a" might not be a typo for :: bool MightBeForRangeStmt = getLangOpts().CPlusPlus; ColonProtectionRAIIObject ColonProtection(*this, MightBeForRangeStmt); ParsedAttributes DeclSpecAttrs(AttrFactory); DG = ParseSimpleDeclaration( DeclaratorContext::ForInit, DeclEnd, attrs, DeclSpecAttrs, false, MightBeForRangeStmt ? &ForRangeInfo : nullptr); FirstPart = Actions.ActOnDeclStmt(DG, DeclStart, Tok.getLocation()); if (ForRangeInfo.ParsedForRangeDecl()) { Diag(ForRangeInfo.ColonLoc, getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_for_range : diag::ext_for_range); ForRangeInfo.LoopVar = FirstPart; FirstPart = StmtResult(); } else if (Tok.is(tok::semi)) { // for (int x = 4; ConsumeToken(); } else if ((ForEach = isTokIdentifier_in())) { Actions.ActOnForEachDeclStmt(DG); // ObjC: for (id x in expr) ConsumeToken(); // consume 'in' if (Tok.is(tok::code_completion)) { cutOffParsing(); Actions.CodeCompletion().CodeCompleteObjCForCollection(getCurScope(), DG); return StmtError(); } Collection = ParseExpression(); } else { Diag(Tok, diag::err_expected_semi_for); } } } else { ProhibitAttributes(attrs); Value = Actions.CorrectDelayedTyposInExpr(ParseExpression()); ForEach = isTokIdentifier_in(); // Turn the expression into a stmt. if (!Value.isInvalid()) { if (ForEach) FirstPart = Actions.ActOnForEachLValueExpr(Value.get()); else { // We already know this is not an init-statement within a for loop, so // if we are parsing a C++11 range-based for loop, we should treat this // expression statement as being a discarded value expression because // we will err below. This way we do not warn on an unused expression // that was an error in the first place, like with: for (expr : expr); bool IsRangeBasedFor = getLangOpts().CPlusPlus11 && !ForEach && Tok.is(tok::colon); FirstPart = Actions.ActOnExprStmt(Value, !IsRangeBasedFor); } } if (Tok.is(tok::semi)) { ConsumeToken(); } else if (ForEach) { ConsumeToken(); // consume 'in' if (Tok.is(tok::code_completion)) { cutOffParsing(); Actions.CodeCompletion().CodeCompleteObjCForCollection(getCurScope(), nullptr); return StmtError(); } Collection = ParseExpression(); } else if (getLangOpts().CPlusPlus11 && Tok.is(tok::colon) && FirstPart.get()) { // User tried to write the reasonable, but ill-formed, for-range-statement // for (expr : expr) { ... } Diag(Tok, diag::err_for_range_expected_decl) << FirstPart.get()->getSourceRange(); SkipUntil(tok::r_paren, StopBeforeMatch); SecondPart = Sema::ConditionError(); } else { if (!Value.isInvalid()) { Diag(Tok, diag::err_expected_semi_for); } else { // Skip until semicolon or rparen, don't consume it. SkipUntil(tok::r_paren, StopAtSemi | StopBeforeMatch); if (Tok.is(tok::semi)) ConsumeToken(); } } } // Parse the second part of the for specifier. if (!ForEach && !ForRangeInfo.ParsedForRangeDecl() && !SecondPart.isInvalid()) { // Parse the second part of the for specifier. if (Tok.is(tok::semi)) { // for (...;; // no second part. } else if (Tok.is(tok::r_paren)) { // missing both semicolons. } else { if (getLangOpts().CPlusPlus) { // C++2a: We've parsed an init-statement; we might have a // for-range-declaration next. bool MightBeForRangeStmt = !ForRangeInfo.ParsedForRangeDecl(); ColonProtectionRAIIObject ColonProtection(*this, MightBeForRangeStmt); SourceLocation SecondPartStart = Tok.getLocation(); Sema::ConditionKind CK = Sema::ConditionKind::Boolean; SecondPart = ParseCXXCondition( /*InitStmt=*/nullptr, ForLoc, CK, // FIXME: recovery if we don't see another semi! /*MissingOK=*/true, MightBeForRangeStmt ? &ForRangeInfo : nullptr, /*EnterForConditionScope=*/true); if (ForRangeInfo.ParsedForRangeDecl()) { Diag(FirstPart.get() ? FirstPart.get()->getBeginLoc() : ForRangeInfo.ColonLoc, getLangOpts().CPlusPlus20 ? diag::warn_cxx17_compat_for_range_init_stmt : diag::ext_for_range_init_stmt) << (FirstPart.get() ? FirstPart.get()->getSourceRange() : SourceRange()); if (EmptyInitStmtSemiLoc.isValid()) { Diag(EmptyInitStmtSemiLoc, diag::warn_empty_init_statement) << /*for-loop*/ 2 << FixItHint::CreateRemoval(EmptyInitStmtSemiLoc); } } if (SecondPart.isInvalid()) { ExprResult CondExpr = Actions.CreateRecoveryExpr( SecondPartStart, Tok.getLocation() == SecondPartStart ? SecondPartStart : PrevTokLocation, {}, Actions.PreferredConditionType(CK)); if (!CondExpr.isInvalid()) SecondPart = Actions.ActOnCondition(getCurScope(), ForLoc, CondExpr.get(), CK, /*MissingOK=*/false); } } else { // We permit 'continue' and 'break' in the condition of a for loop. getCurScope()->AddFlags(Scope::BreakScope | Scope::ContinueScope); ExprResult SecondExpr = ParseExpression(); if (SecondExpr.isInvalid()) SecondPart = Sema::ConditionError(); else SecondPart = Actions.ActOnCondition( getCurScope(), ForLoc, SecondExpr.get(), Sema::ConditionKind::Boolean, /*MissingOK=*/true); } } } // Enter a break / continue scope, if we didn't already enter one while // parsing the second part. if (!getCurScope()->isContinueScope()) getCurScope()->AddFlags(Scope::BreakScope | Scope::ContinueScope); // Parse the third part of the for statement. if (!ForEach && !ForRangeInfo.ParsedForRangeDecl()) { if (Tok.isNot(tok::semi)) { if (!SecondPart.isInvalid()) Diag(Tok, diag::err_expected_semi_for); SkipUntil(tok::r_paren, StopAtSemi | StopBeforeMatch); } if (Tok.is(tok::semi)) { ConsumeToken(); } if (Tok.isNot(tok::r_paren)) { // for (...;...;) ExprResult Third = ParseExpression(); // FIXME: The C++11 standard doesn't actually say that this is a // discarded-value expression, but it clearly should be. ThirdPart = Actions.MakeFullDiscardedValueExpr(Third.get()); } } // Match the ')'. T.consumeClose(); // C++ Coroutines [stmt.iter]: // 'co_await' can only be used for a range-based for statement. if (CoawaitLoc.isValid() && !ForRangeInfo.ParsedForRangeDecl()) { Diag(CoawaitLoc, diag::err_for_co_await_not_range_for); CoawaitLoc = SourceLocation(); } if (CoawaitLoc.isValid() && getLangOpts().CPlusPlus20) Diag(CoawaitLoc, diag::warn_deprecated_for_co_await); // We need to perform most of the semantic analysis for a C++0x for-range // statememt before parsing the body, in order to be able to deduce the type // of an auto-typed loop variable. StmtResult ForRangeStmt; StmtResult ForEachStmt; if (ForRangeInfo.ParsedForRangeDecl()) { ExprResult CorrectedRange = Actions.CorrectDelayedTyposInExpr(ForRangeInfo.RangeExpr.get()); ForRangeStmt = Actions.ActOnCXXForRangeStmt( getCurScope(), ForLoc, CoawaitLoc, FirstPart.get(), ForRangeInfo.LoopVar.get(), ForRangeInfo.ColonLoc, CorrectedRange.get(), T.getCloseLocation(), Sema::BFRK_Build, ForRangeInfo.LifetimeExtendTemps); } else if (ForEach) { // Similarly, we need to do the semantic analysis for a for-range // statement immediately in order to close over temporaries correctly. ForEachStmt = Actions.ObjC().ActOnObjCForCollectionStmt( ForLoc, FirstPart.get(), Collection.get(), T.getCloseLocation()); } else { // In OpenMP loop region loop control variable must be captured and be // private. Perform analysis of first part (if any). if (getLangOpts().OpenMP && FirstPart.isUsable()) { Actions.OpenMP().ActOnOpenMPLoopInitialization(ForLoc, FirstPart.get()); } } // C99 6.8.5p5 - In C99, the body of the for statement is a scope, even if // there is no compound stmt. C90 does not have this clause. We only do this // if the body isn't a compound statement to avoid push/pop in common cases. // // C++ 6.5p2: // The substatement in an iteration-statement implicitly defines a local scope // which is entered and exited each time through the loop. // // See comments in ParseIfStatement for why we create a scope for // for-init-statement/condition and a new scope for substatement in C++. // ParseScope InnerScope(this, Scope::DeclScope, C99orCXXorObjC, Tok.is(tok::l_brace)); // The body of the for loop has the same local mangling number as the // for-init-statement. // It will only be incremented if the body contains other things that would // normally increment the mangling number (like a compound statement). if (C99orCXXorObjC) getCurScope()->decrementMSManglingNumber(); MisleadingIndentationChecker MIChecker(*this, MSK_for, ForLoc); // Read the body statement. StmtResult Body(ParseStatement(TrailingElseLoc)); if (Body.isUsable()) MIChecker.Check(); // Pop the body scope if needed. InnerScope.Exit(); // Leave the for-scope. ForScope.Exit(); if (Body.isInvalid()) return StmtError(); if (ForEach) return Actions.ObjC().FinishObjCForCollectionStmt(ForEachStmt.get(), Body.get()); if (ForRangeInfo.ParsedForRangeDecl()) return Actions.FinishCXXForRangeStmt(ForRangeStmt.get(), Body.get()); return Actions.ActOnForStmt(ForLoc, T.getOpenLocation(), FirstPart.get(), SecondPart, ThirdPart, T.getCloseLocation(), Body.get()); } /// ParseGotoStatement /// jump-statement: /// 'goto' identifier ';' /// [GNU] 'goto' '*' expression ';' /// /// Note: this lets the caller parse the end ';'. /// StmtResult Parser::ParseGotoStatement() { assert(Tok.is(tok::kw_goto) && "Not a goto stmt!"); SourceLocation GotoLoc = ConsumeToken(); // eat the 'goto'. StmtResult Res; if (Tok.is(tok::identifier)) { LabelDecl *LD = Actions.LookupOrCreateLabel(Tok.getIdentifierInfo(), Tok.getLocation()); Res = Actions.ActOnGotoStmt(GotoLoc, Tok.getLocation(), LD); ConsumeToken(); } else if (Tok.is(tok::star)) { // GNU indirect goto extension. Diag(Tok, diag::ext_gnu_indirect_goto); SourceLocation StarLoc = ConsumeToken(); ExprResult R(ParseExpression()); if (R.isInvalid()) { // Skip to the semicolon, but don't consume it. SkipUntil(tok::semi, StopBeforeMatch); return StmtError(); } Res = Actions.ActOnIndirectGotoStmt(GotoLoc, StarLoc, R.get()); } else { Diag(Tok, diag::err_expected) << tok::identifier; return StmtError(); } return Res; } /// ParseContinueStatement /// jump-statement: /// 'continue' ';' /// /// Note: this lets the caller parse the end ';'. /// StmtResult Parser::ParseContinueStatement() { SourceLocation ContinueLoc = ConsumeToken(); // eat the 'continue'. return Actions.ActOnContinueStmt(ContinueLoc, getCurScope()); } /// ParseBreakStatement /// jump-statement: /// 'break' ';' /// /// Note: this lets the caller parse the end ';'. /// StmtResult Parser::ParseBreakStatement() { SourceLocation BreakLoc = ConsumeToken(); // eat the 'break'. return Actions.ActOnBreakStmt(BreakLoc, getCurScope()); } /// ParseReturnStatement /// jump-statement: /// 'return' expression[opt] ';' /// 'return' braced-init-list ';' /// 'co_return' expression[opt] ';' /// 'co_return' braced-init-list ';' StmtResult Parser::ParseReturnStatement() { assert((Tok.is(tok::kw_return) || Tok.is(tok::kw_co_return)) && "Not a return stmt!"); bool IsCoreturn = Tok.is(tok::kw_co_return); SourceLocation ReturnLoc = ConsumeToken(); // eat the 'return'. ExprResult R; if (Tok.isNot(tok::semi)) { if (!IsCoreturn) PreferredType.enterReturn(Actions, Tok.getLocation()); // FIXME: Code completion for co_return. if (Tok.is(tok::code_completion) && !IsCoreturn) { cutOffParsing(); Actions.CodeCompletion().CodeCompleteExpression( getCurScope(), PreferredType.get(Tok.getLocation())); return StmtError(); } if (Tok.is(tok::l_brace) && getLangOpts().CPlusPlus) { R = ParseInitializer(); if (R.isUsable()) Diag(R.get()->getBeginLoc(), getLangOpts().CPlusPlus11 ? diag::warn_cxx98_compat_generalized_initializer_lists : diag::ext_generalized_initializer_lists) << R.get()->getSourceRange(); } else R = ParseExpression(); if (R.isInvalid()) { SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch); return StmtError(); } } if (IsCoreturn) return Actions.ActOnCoreturnStmt(getCurScope(), ReturnLoc, R.get()); return Actions.ActOnReturnStmt(ReturnLoc, R.get(), getCurScope()); } StmtResult Parser::ParsePragmaLoopHint(StmtVector &Stmts, ParsedStmtContext StmtCtx, SourceLocation *TrailingElseLoc, ParsedAttributes &Attrs) { // Create temporary attribute list. ParsedAttributes TempAttrs(AttrFactory); SourceLocation StartLoc = Tok.getLocation(); // Get loop hints and consume annotated token. while (Tok.is(tok::annot_pragma_loop_hint)) { LoopHint Hint; if (!HandlePragmaLoopHint(Hint)) continue; ArgsUnion ArgHints[] = {Hint.PragmaNameLoc, Hint.OptionLoc, Hint.StateLoc, ArgsUnion(Hint.ValueExpr)}; TempAttrs.addNew(Hint.PragmaNameLoc->Ident, Hint.Range, nullptr, Hint.PragmaNameLoc->Loc, ArgHints, 4, ParsedAttr::Form::Pragma()); } // Get the next statement. MaybeParseCXX11Attributes(Attrs); ParsedAttributes EmptyDeclSpecAttrs(AttrFactory); StmtResult S = ParseStatementOrDeclarationAfterAttributes( Stmts, StmtCtx, TrailingElseLoc, Attrs, EmptyDeclSpecAttrs); Attrs.takeAllFrom(TempAttrs); // Start of attribute range may already be set for some invalid input. // See PR46336. if (Attrs.Range.getBegin().isInvalid()) Attrs.Range.setBegin(StartLoc); return S; } Decl *Parser::ParseFunctionStatementBody(Decl *Decl, ParseScope &BodyScope) { assert(Tok.is(tok::l_brace)); SourceLocation LBraceLoc = Tok.getLocation(); PrettyDeclStackTraceEntry CrashInfo(Actions.Context, Decl, LBraceLoc, "parsing function body"); // Save and reset current vtordisp stack if we have entered a C++ method body. bool IsCXXMethod = getLangOpts().CPlusPlus && Decl && isa(Decl); Sema::PragmaStackSentinelRAII PragmaStackSentinel(Actions, "InternalPragmaState", IsCXXMethod); // Do not enter a scope for the brace, as the arguments are in the same scope // (the function body) as the body itself. Instead, just read the statement // list and put it into a CompoundStmt for safe keeping. StmtResult FnBody(ParseCompoundStatementBody()); // If the function body could not be parsed, make a bogus compoundstmt. if (FnBody.isInvalid()) { Sema::CompoundScopeRAII CompoundScope(Actions); FnBody = Actions.ActOnCompoundStmt(LBraceLoc, LBraceLoc, std::nullopt, false); } BodyScope.Exit(); return Actions.ActOnFinishFunctionBody(Decl, FnBody.get()); } /// ParseFunctionTryBlock - Parse a C++ function-try-block. /// /// function-try-block: /// 'try' ctor-initializer[opt] compound-statement handler-seq /// Decl *Parser::ParseFunctionTryBlock(Decl *Decl, ParseScope &BodyScope) { assert(Tok.is(tok::kw_try) && "Expected 'try'"); SourceLocation TryLoc = ConsumeToken(); PrettyDeclStackTraceEntry CrashInfo(Actions.Context, Decl, TryLoc, "parsing function try block"); // Constructor initializer list? if (Tok.is(tok::colon)) ParseConstructorInitializer(Decl); else Actions.ActOnDefaultCtorInitializers(Decl); // Save and reset current vtordisp stack if we have entered a C++ method body. bool IsCXXMethod = getLangOpts().CPlusPlus && Decl && isa(Decl); Sema::PragmaStackSentinelRAII PragmaStackSentinel(Actions, "InternalPragmaState", IsCXXMethod); SourceLocation LBraceLoc = Tok.getLocation(); StmtResult FnBody(ParseCXXTryBlockCommon(TryLoc, /*FnTry*/true)); // If we failed to parse the try-catch, we just give the function an empty // compound statement as the body. if (FnBody.isInvalid()) { Sema::CompoundScopeRAII CompoundScope(Actions); FnBody = Actions.ActOnCompoundStmt(LBraceLoc, LBraceLoc, std::nullopt, false); } BodyScope.Exit(); return Actions.ActOnFinishFunctionBody(Decl, FnBody.get()); } bool Parser::trySkippingFunctionBody() { assert(SkipFunctionBodies && "Should only be called when SkipFunctionBodies is enabled"); if (!PP.isCodeCompletionEnabled()) { SkipFunctionBody(); return true; } // We're in code-completion mode. Skip parsing for all function bodies unless // the body contains the code-completion point. TentativeParsingAction PA(*this); bool IsTryCatch = Tok.is(tok::kw_try); CachedTokens Toks; bool ErrorInPrologue = ConsumeAndStoreFunctionPrologue(Toks); if (llvm::any_of(Toks, [](const Token &Tok) { return Tok.is(tok::code_completion); })) { PA.Revert(); return false; } if (ErrorInPrologue) { PA.Commit(); SkipMalformedDecl(); return true; } if (!SkipUntil(tok::r_brace, StopAtCodeCompletion)) { PA.Revert(); return false; } while (IsTryCatch && Tok.is(tok::kw_catch)) { if (!SkipUntil(tok::l_brace, StopAtCodeCompletion) || !SkipUntil(tok::r_brace, StopAtCodeCompletion)) { PA.Revert(); return false; } } PA.Commit(); return true; } /// ParseCXXTryBlock - Parse a C++ try-block. /// /// try-block: /// 'try' compound-statement handler-seq /// StmtResult Parser::ParseCXXTryBlock() { assert(Tok.is(tok::kw_try) && "Expected 'try'"); SourceLocation TryLoc = ConsumeToken(); return ParseCXXTryBlockCommon(TryLoc); } /// ParseCXXTryBlockCommon - Parse the common part of try-block and /// function-try-block. /// /// try-block: /// 'try' compound-statement handler-seq /// /// function-try-block: /// 'try' ctor-initializer[opt] compound-statement handler-seq /// /// handler-seq: /// handler handler-seq[opt] /// /// [Borland] try-block: /// 'try' compound-statement seh-except-block /// 'try' compound-statement seh-finally-block /// StmtResult Parser::ParseCXXTryBlockCommon(SourceLocation TryLoc, bool FnTry) { if (Tok.isNot(tok::l_brace)) return StmtError(Diag(Tok, diag::err_expected) << tok::l_brace); StmtResult TryBlock(ParseCompoundStatement( /*isStmtExpr=*/false, Scope::DeclScope | Scope::TryScope | Scope::CompoundStmtScope | (FnTry ? Scope::FnTryCatchScope : 0))); if (TryBlock.isInvalid()) return TryBlock; // Borland allows SEH-handlers with 'try' if ((Tok.is(tok::identifier) && Tok.getIdentifierInfo() == getSEHExceptKeyword()) || Tok.is(tok::kw___finally)) { // TODO: Factor into common return ParseSEHHandlerCommon(...) StmtResult Handler; if(Tok.getIdentifierInfo() == getSEHExceptKeyword()) { SourceLocation Loc = ConsumeToken(); Handler = ParseSEHExceptBlock(Loc); } else { SourceLocation Loc = ConsumeToken(); Handler = ParseSEHFinallyBlock(Loc); } if(Handler.isInvalid()) return Handler; return Actions.ActOnSEHTryBlock(true /* IsCXXTry */, TryLoc, TryBlock.get(), Handler.get()); } else { StmtVector Handlers; // C++11 attributes can't appear here, despite this context seeming // statement-like. DiagnoseAndSkipCXX11Attributes(); if (Tok.isNot(tok::kw_catch)) return StmtError(Diag(Tok, diag::err_expected_catch)); while (Tok.is(tok::kw_catch)) { StmtResult Handler(ParseCXXCatchBlock(FnTry)); if (!Handler.isInvalid()) Handlers.push_back(Handler.get()); } // Don't bother creating the full statement if we don't have any usable // handlers. if (Handlers.empty()) return StmtError(); return Actions.ActOnCXXTryBlock(TryLoc, TryBlock.get(), Handlers); } } /// ParseCXXCatchBlock - Parse a C++ catch block, called handler in the standard /// /// handler: /// 'catch' '(' exception-declaration ')' compound-statement /// /// exception-declaration: /// attribute-specifier-seq[opt] type-specifier-seq declarator /// attribute-specifier-seq[opt] type-specifier-seq abstract-declarator[opt] /// '...' /// StmtResult Parser::ParseCXXCatchBlock(bool FnCatch) { assert(Tok.is(tok::kw_catch) && "Expected 'catch'"); SourceLocation CatchLoc = ConsumeToken(); BalancedDelimiterTracker T(*this, tok::l_paren); if (T.expectAndConsume()) return StmtError(); // C++ 3.3.2p3: // The name in a catch exception-declaration is local to the handler and // shall not be redeclared in the outermost block of the handler. ParseScope CatchScope(this, Scope::DeclScope | Scope::ControlScope | Scope::CatchScope | (FnCatch ? Scope::FnTryCatchScope : 0)); // exception-declaration is equivalent to '...' or a parameter-declaration // without default arguments. Decl *ExceptionDecl = nullptr; if (Tok.isNot(tok::ellipsis)) { ParsedAttributes Attributes(AttrFactory); MaybeParseCXX11Attributes(Attributes); DeclSpec DS(AttrFactory); if (ParseCXXTypeSpecifierSeq(DS)) return StmtError(); Declarator ExDecl(DS, Attributes, DeclaratorContext::CXXCatch); ParseDeclarator(ExDecl); ExceptionDecl = Actions.ActOnExceptionDeclarator(getCurScope(), ExDecl); } else ConsumeToken(); T.consumeClose(); if (T.getCloseLocation().isInvalid()) return StmtError(); if (Tok.isNot(tok::l_brace)) return StmtError(Diag(Tok, diag::err_expected) << tok::l_brace); // FIXME: Possible draft standard bug: attribute-specifier should be allowed? StmtResult Block(ParseCompoundStatement()); if (Block.isInvalid()) return Block; return Actions.ActOnCXXCatchBlock(CatchLoc, ExceptionDecl, Block.get()); } void Parser::ParseMicrosoftIfExistsStatement(StmtVector &Stmts) { IfExistsCondition Result; if (ParseMicrosoftIfExistsCondition(Result)) return; // Handle dependent statements by parsing the braces as a compound statement. // This is not the same behavior as Visual C++, which don't treat this as a // compound statement, but for Clang's type checking we can't have anything // inside these braces escaping to the surrounding code. if (Result.Behavior == IEB_Dependent) { if (!Tok.is(tok::l_brace)) { Diag(Tok, diag::err_expected) << tok::l_brace; return; } StmtResult Compound = ParseCompoundStatement(); if (Compound.isInvalid()) return; StmtResult DepResult = Actions.ActOnMSDependentExistsStmt(Result.KeywordLoc, Result.IsIfExists, Result.SS, Result.Name, Compound.get()); if (DepResult.isUsable()) Stmts.push_back(DepResult.get()); return; } BalancedDelimiterTracker Braces(*this, tok::l_brace); if (Braces.consumeOpen()) { Diag(Tok, diag::err_expected) << tok::l_brace; return; } switch (Result.Behavior) { case IEB_Parse: // Parse the statements below. break; case IEB_Dependent: llvm_unreachable("Dependent case handled above"); case IEB_Skip: Braces.skipToEnd(); return; } // Condition is true, parse the statements. while (Tok.isNot(tok::r_brace)) { StmtResult R = ParseStatementOrDeclaration(Stmts, ParsedStmtContext::Compound); if (R.isUsable()) Stmts.push_back(R.get()); } Braces.consumeClose(); } diff --git a/contrib/llvm-project/clang/lib/Sema/SemaTemplateDeductionGuide.cpp b/contrib/llvm-project/clang/lib/Sema/SemaTemplateDeductionGuide.cpp index 0602d07c6b9b..1bf82b31def9 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaTemplateDeductionGuide.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaTemplateDeductionGuide.cpp @@ -1,1435 +1,1450 @@ //===- SemaTemplateDeductionGude.cpp - Template Argument Deduction---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements deduction guides for C++ class template argument // deduction. // //===----------------------------------------------------------------------===// #include "TreeTransform.h" #include "TypeLocBuilder.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/TypeTraits.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/Initialization.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Overload.h" #include "clang/Sema/Ownership.h" #include "clang/Sema/Scope.h" #include "clang/Sema/Template.h" #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include #include #include using namespace clang; using namespace sema; namespace { /// Tree transform to "extract" a transformed type from a class template's /// constructor to a deduction guide. class ExtractTypeForDeductionGuide : public TreeTransform { llvm::SmallVectorImpl &MaterializedTypedefs; ClassTemplateDecl *NestedPattern; const MultiLevelTemplateArgumentList *OuterInstantiationArgs; std::optional TypedefNameInstantiator; public: typedef TreeTransform Base; ExtractTypeForDeductionGuide( Sema &SemaRef, llvm::SmallVectorImpl &MaterializedTypedefs, - ClassTemplateDecl *NestedPattern, - const MultiLevelTemplateArgumentList *OuterInstantiationArgs) + ClassTemplateDecl *NestedPattern = nullptr, + const MultiLevelTemplateArgumentList *OuterInstantiationArgs = nullptr) : Base(SemaRef), MaterializedTypedefs(MaterializedTypedefs), NestedPattern(NestedPattern), OuterInstantiationArgs(OuterInstantiationArgs) { if (OuterInstantiationArgs) TypedefNameInstantiator.emplace( SemaRef, SemaRef.getASTContext().getTranslationUnitDecl(), *OuterInstantiationArgs); } TypeSourceInfo *transform(TypeSourceInfo *TSI) { return TransformType(TSI); } /// Returns true if it's safe to substitute \p Typedef with /// \p OuterInstantiationArgs. bool mightReferToOuterTemplateParameters(TypedefNameDecl *Typedef) { if (!NestedPattern) return false; static auto WalkUp = [](DeclContext *DC, DeclContext *TargetDC) { if (DC->Equals(TargetDC)) return true; while (DC->isRecord()) { if (DC->Equals(TargetDC)) return true; DC = DC->getParent(); } return false; }; if (WalkUp(Typedef->getDeclContext(), NestedPattern->getTemplatedDecl())) return true; if (WalkUp(NestedPattern->getTemplatedDecl(), Typedef->getDeclContext())) return true; return false; } QualType RebuildTemplateSpecializationType(TemplateName Template, SourceLocation TemplateNameLoc, TemplateArgumentListInfo &TemplateArgs) { if (!OuterInstantiationArgs || !isa_and_present(Template.getAsTemplateDecl())) return Base::RebuildTemplateSpecializationType(Template, TemplateNameLoc, TemplateArgs); auto *TATD = cast(Template.getAsTemplateDecl()); auto *Pattern = TATD; while (Pattern->getInstantiatedFromMemberTemplate()) Pattern = Pattern->getInstantiatedFromMemberTemplate(); if (!mightReferToOuterTemplateParameters(Pattern->getTemplatedDecl())) return Base::RebuildTemplateSpecializationType(Template, TemplateNameLoc, TemplateArgs); Decl *NewD = TypedefNameInstantiator->InstantiateTypeAliasTemplateDecl(TATD); if (!NewD) return QualType(); auto *NewTATD = cast(NewD); MaterializedTypedefs.push_back(NewTATD->getTemplatedDecl()); return Base::RebuildTemplateSpecializationType( TemplateName(NewTATD), TemplateNameLoc, TemplateArgs); } QualType TransformTypedefType(TypeLocBuilder &TLB, TypedefTypeLoc TL) { ASTContext &Context = SemaRef.getASTContext(); TypedefNameDecl *OrigDecl = TL.getTypedefNameDecl(); TypedefNameDecl *Decl = OrigDecl; // Transform the underlying type of the typedef and clone the Decl only if // the typedef has a dependent context. bool InDependentContext = OrigDecl->getDeclContext()->isDependentContext(); // A typedef/alias Decl within the NestedPattern may reference the outer // template parameters. They're substituted with corresponding instantiation // arguments here and in RebuildTemplateSpecializationType() above. // Otherwise, we would have a CTAD guide with "dangling" template // parameters. // For example, // template struct Outer { // using Alias = S; // template struct Inner { // Inner(Alias); // }; // }; if (OuterInstantiationArgs && InDependentContext && TL.getTypePtr()->isInstantiationDependentType()) { Decl = cast_if_present( TypedefNameInstantiator->InstantiateTypedefNameDecl( OrigDecl, /*IsTypeAlias=*/isa(OrigDecl))); if (!Decl) return QualType(); MaterializedTypedefs.push_back(Decl); } else if (InDependentContext) { TypeLocBuilder InnerTLB; QualType Transformed = TransformType(InnerTLB, OrigDecl->getTypeSourceInfo()->getTypeLoc()); TypeSourceInfo *TSI = InnerTLB.getTypeSourceInfo(Context, Transformed); if (isa(OrigDecl)) Decl = TypeAliasDecl::Create( Context, Context.getTranslationUnitDecl(), OrigDecl->getBeginLoc(), OrigDecl->getLocation(), OrigDecl->getIdentifier(), TSI); else { assert(isa(OrigDecl) && "Not a Type alias or typedef"); Decl = TypedefDecl::Create( Context, Context.getTranslationUnitDecl(), OrigDecl->getBeginLoc(), OrigDecl->getLocation(), OrigDecl->getIdentifier(), TSI); } MaterializedTypedefs.push_back(Decl); } QualType TDTy = Context.getTypedefType(Decl); TypedefTypeLoc TypedefTL = TLB.push(TDTy); TypedefTL.setNameLoc(TL.getNameLoc()); return TDTy; } }; // Build a deduction guide using the provided information. // // A deduction guide can be either a template or a non-template function // declaration. If \p TemplateParams is null, a non-template function // declaration will be created. NamedDecl *buildDeductionGuide( Sema &SemaRef, TemplateDecl *OriginalTemplate, TemplateParameterList *TemplateParams, CXXConstructorDecl *Ctor, ExplicitSpecifier ES, TypeSourceInfo *TInfo, SourceLocation LocStart, SourceLocation Loc, SourceLocation LocEnd, bool IsImplicit, llvm::ArrayRef MaterializedTypedefs = {}) { DeclContext *DC = OriginalTemplate->getDeclContext(); auto DeductionGuideName = SemaRef.Context.DeclarationNames.getCXXDeductionGuideName( OriginalTemplate); DeclarationNameInfo Name(DeductionGuideName, Loc); ArrayRef Params = TInfo->getTypeLoc().castAs().getParams(); // Build the implicit deduction guide template. auto *Guide = CXXDeductionGuideDecl::Create(SemaRef.Context, DC, LocStart, ES, Name, TInfo->getType(), TInfo, LocEnd, Ctor); Guide->setImplicit(IsImplicit); Guide->setParams(Params); for (auto *Param : Params) Param->setDeclContext(Guide); for (auto *TD : MaterializedTypedefs) TD->setDeclContext(Guide); if (isa(DC)) Guide->setAccess(AS_public); if (!TemplateParams) { DC->addDecl(Guide); return Guide; } auto *GuideTemplate = FunctionTemplateDecl::Create( SemaRef.Context, DC, Loc, DeductionGuideName, TemplateParams, Guide); GuideTemplate->setImplicit(IsImplicit); Guide->setDescribedFunctionTemplate(GuideTemplate); if (isa(DC)) GuideTemplate->setAccess(AS_public); DC->addDecl(GuideTemplate); return GuideTemplate; } // Transform a given template type parameter `TTP`. TemplateTypeParmDecl * transformTemplateTypeParam(Sema &SemaRef, DeclContext *DC, TemplateTypeParmDecl *TTP, MultiLevelTemplateArgumentList &Args, unsigned NewDepth, unsigned NewIndex) { // TemplateTypeParmDecl's index cannot be changed after creation, so // substitute it directly. auto *NewTTP = TemplateTypeParmDecl::Create( SemaRef.Context, DC, TTP->getBeginLoc(), TTP->getLocation(), NewDepth, NewIndex, TTP->getIdentifier(), TTP->wasDeclaredWithTypename(), TTP->isParameterPack(), TTP->hasTypeConstraint(), TTP->isExpandedParameterPack() ? std::optional(TTP->getNumExpansionParameters()) : std::nullopt); if (const auto *TC = TTP->getTypeConstraint()) SemaRef.SubstTypeConstraint(NewTTP, TC, Args, /*EvaluateConstraint=*/true); if (TTP->hasDefaultArgument()) { TemplateArgumentLoc InstantiatedDefaultArg; if (!SemaRef.SubstTemplateArgument( TTP->getDefaultArgument(), Args, InstantiatedDefaultArg, TTP->getDefaultArgumentLoc(), TTP->getDeclName())) NewTTP->setDefaultArgument(SemaRef.Context, InstantiatedDefaultArg); } SemaRef.CurrentInstantiationScope->InstantiatedLocal(TTP, NewTTP); return NewTTP; } // Similar to above, but for non-type template or template template parameters. template NonTypeTemplateOrTemplateTemplateParmDecl * transformTemplateParam(Sema &SemaRef, DeclContext *DC, NonTypeTemplateOrTemplateTemplateParmDecl *OldParam, MultiLevelTemplateArgumentList &Args, unsigned NewIndex, unsigned NewDepth) { // Ask the template instantiator to do the heavy lifting for us, then adjust // the index of the parameter once it's done. auto *NewParam = cast( SemaRef.SubstDecl(OldParam, DC, Args)); NewParam->setPosition(NewIndex); NewParam->setDepth(NewDepth); return NewParam; } /// Transform to convert portions of a constructor declaration into the /// corresponding deduction guide, per C++1z [over.match.class.deduct]p1. struct ConvertConstructorToDeductionGuideTransform { ConvertConstructorToDeductionGuideTransform(Sema &S, ClassTemplateDecl *Template) : SemaRef(S), Template(Template) { // If the template is nested, then we need to use the original // pattern to iterate over the constructors. ClassTemplateDecl *Pattern = Template; while (Pattern->getInstantiatedFromMemberTemplate()) { if (Pattern->isMemberSpecialization()) break; Pattern = Pattern->getInstantiatedFromMemberTemplate(); NestedPattern = Pattern; } if (NestedPattern) OuterInstantiationArgs = SemaRef.getTemplateInstantiationArgs(Template); } Sema &SemaRef; ClassTemplateDecl *Template; ClassTemplateDecl *NestedPattern = nullptr; DeclContext *DC = Template->getDeclContext(); CXXRecordDecl *Primary = Template->getTemplatedDecl(); DeclarationName DeductionGuideName = SemaRef.Context.DeclarationNames.getCXXDeductionGuideName(Template); QualType DeducedType = SemaRef.Context.getTypeDeclType(Primary); // Index adjustment to apply to convert depth-1 template parameters into // depth-0 template parameters. unsigned Depth1IndexAdjustment = Template->getTemplateParameters()->size(); // Instantiation arguments for the outermost depth-1 templates // when the template is nested MultiLevelTemplateArgumentList OuterInstantiationArgs; /// Transform a constructor declaration into a deduction guide. NamedDecl *transformConstructor(FunctionTemplateDecl *FTD, CXXConstructorDecl *CD) { SmallVector SubstArgs; LocalInstantiationScope Scope(SemaRef); // C++ [over.match.class.deduct]p1: // -- For each constructor of the class template designated by the // template-name, a function template with the following properties: // -- The template parameters are the template parameters of the class // template followed by the template parameters (including default // template arguments) of the constructor, if any. TemplateParameterList *TemplateParams = SemaRef.GetTemplateParameterList(Template); if (FTD) { TemplateParameterList *InnerParams = FTD->getTemplateParameters(); SmallVector AllParams; SmallVector Depth1Args; AllParams.reserve(TemplateParams->size() + InnerParams->size()); AllParams.insert(AllParams.begin(), TemplateParams->begin(), TemplateParams->end()); SubstArgs.reserve(InnerParams->size()); Depth1Args.reserve(InnerParams->size()); // Later template parameters could refer to earlier ones, so build up // a list of substituted template arguments as we go. for (NamedDecl *Param : *InnerParams) { MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(Depth1Args); Args.addOuterRetainedLevel(); if (NestedPattern) Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); NamedDecl *NewParam = transformTemplateParameter(Param, Args); if (!NewParam) return nullptr; // Constraints require that we substitute depth-1 arguments // to match depths when substituted for evaluation later Depth1Args.push_back(SemaRef.Context.getInjectedTemplateArg(NewParam)); if (NestedPattern) { TemplateDeclInstantiator Instantiator(SemaRef, DC, OuterInstantiationArgs); Instantiator.setEvaluateConstraints(false); SemaRef.runWithSufficientStackSpace(NewParam->getLocation(), [&] { NewParam = cast(Instantiator.Visit(NewParam)); }); } assert(NewParam->getTemplateDepth() == 0 && "Unexpected template parameter depth"); AllParams.push_back(NewParam); SubstArgs.push_back(SemaRef.Context.getInjectedTemplateArg(NewParam)); } // Substitute new template parameters into requires-clause if present. Expr *RequiresClause = nullptr; if (Expr *InnerRC = InnerParams->getRequiresClause()) { MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(Depth1Args); Args.addOuterRetainedLevel(); if (NestedPattern) Args.addOuterRetainedLevels(NestedPattern->getTemplateDepth()); ExprResult E = SemaRef.SubstExpr(InnerRC, Args); if (E.isInvalid()) return nullptr; RequiresClause = E.getAs(); } TemplateParams = TemplateParameterList::Create( SemaRef.Context, InnerParams->getTemplateLoc(), InnerParams->getLAngleLoc(), AllParams, InnerParams->getRAngleLoc(), RequiresClause); } // If we built a new template-parameter-list, track that we need to // substitute references to the old parameters into references to the // new ones. MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); if (FTD) { Args.addOuterTemplateArguments(SubstArgs); Args.addOuterRetainedLevel(); } FunctionProtoTypeLoc FPTL = CD->getTypeSourceInfo() ->getTypeLoc() .getAsAdjusted(); assert(FPTL && "no prototype for constructor declaration"); // Transform the type of the function, adjusting the return type and // replacing references to the old parameters with references to the // new ones. TypeLocBuilder TLB; SmallVector Params; SmallVector MaterializedTypedefs; QualType NewType = transformFunctionProtoType(TLB, FPTL, Params, Args, MaterializedTypedefs); if (NewType.isNull()) return nullptr; TypeSourceInfo *NewTInfo = TLB.getTypeSourceInfo(SemaRef.Context, NewType); return buildDeductionGuide( SemaRef, Template, TemplateParams, CD, CD->getExplicitSpecifier(), NewTInfo, CD->getBeginLoc(), CD->getLocation(), CD->getEndLoc(), /*IsImplicit=*/true, MaterializedTypedefs); } /// Build a deduction guide with the specified parameter types. NamedDecl *buildSimpleDeductionGuide(MutableArrayRef ParamTypes) { SourceLocation Loc = Template->getLocation(); // Build the requested type. FunctionProtoType::ExtProtoInfo EPI; EPI.HasTrailingReturn = true; QualType Result = SemaRef.BuildFunctionType(DeducedType, ParamTypes, Loc, DeductionGuideName, EPI); TypeSourceInfo *TSI = SemaRef.Context.getTrivialTypeSourceInfo(Result, Loc); if (NestedPattern) TSI = SemaRef.SubstType(TSI, OuterInstantiationArgs, Loc, DeductionGuideName); if (!TSI) return nullptr; FunctionProtoTypeLoc FPTL = TSI->getTypeLoc().castAs(); // Build the parameters, needed during deduction / substitution. SmallVector Params; for (auto T : ParamTypes) { auto *TSI = SemaRef.Context.getTrivialTypeSourceInfo(T, Loc); if (NestedPattern) TSI = SemaRef.SubstType(TSI, OuterInstantiationArgs, Loc, DeclarationName()); if (!TSI) return nullptr; ParmVarDecl *NewParam = ParmVarDecl::Create(SemaRef.Context, DC, Loc, Loc, nullptr, TSI->getType(), TSI, SC_None, nullptr); NewParam->setScopeInfo(0, Params.size()); FPTL.setParam(Params.size(), NewParam); Params.push_back(NewParam); } return buildDeductionGuide( SemaRef, Template, SemaRef.GetTemplateParameterList(Template), nullptr, ExplicitSpecifier(), TSI, Loc, Loc, Loc, /*IsImplicit=*/true); } private: /// Transform a constructor template parameter into a deduction guide template /// parameter, rebuilding any internal references to earlier parameters and /// renumbering as we go. NamedDecl *transformTemplateParameter(NamedDecl *TemplateParam, MultiLevelTemplateArgumentList &Args) { if (auto *TTP = dyn_cast(TemplateParam)) return transformTemplateTypeParam( SemaRef, DC, TTP, Args, TTP->getDepth() - 1, Depth1IndexAdjustment + TTP->getIndex()); if (auto *TTP = dyn_cast(TemplateParam)) return transformTemplateParam(SemaRef, DC, TTP, Args, Depth1IndexAdjustment + TTP->getIndex(), TTP->getDepth() - 1); auto *NTTP = cast(TemplateParam); return transformTemplateParam(SemaRef, DC, NTTP, Args, Depth1IndexAdjustment + NTTP->getIndex(), NTTP->getDepth() - 1); } QualType transformFunctionProtoType( TypeLocBuilder &TLB, FunctionProtoTypeLoc TL, SmallVectorImpl &Params, MultiLevelTemplateArgumentList &Args, SmallVectorImpl &MaterializedTypedefs) { SmallVector ParamTypes; const FunctionProtoType *T = TL.getTypePtr(); // -- The types of the function parameters are those of the constructor. for (auto *OldParam : TL.getParams()) { ParmVarDecl *NewParam = OldParam; // Given // template struct C { // template struct D { // template D(U, V); // }; // }; // First, transform all the references to template parameters that are // defined outside of the surrounding class template. That is T in the // above example. if (NestedPattern) { NewParam = transformFunctionTypeParam( NewParam, OuterInstantiationArgs, MaterializedTypedefs, /*TransformingOuterPatterns=*/true); if (!NewParam) return QualType(); } // Then, transform all the references to template parameters that are // defined at the class template and the constructor. In this example, // they're U and V, respectively. NewParam = transformFunctionTypeParam(NewParam, Args, MaterializedTypedefs, /*TransformingOuterPatterns=*/false); if (!NewParam) return QualType(); ParamTypes.push_back(NewParam->getType()); Params.push_back(NewParam); } // -- The return type is the class template specialization designated by // the template-name and template arguments corresponding to the // template parameters obtained from the class template. // // We use the injected-class-name type of the primary template instead. // This has the convenient property that it is different from any type that // the user can write in a deduction-guide (because they cannot enter the // context of the template), so implicit deduction guides can never collide // with explicit ones. QualType ReturnType = DeducedType; TLB.pushTypeSpec(ReturnType).setNameLoc(Primary->getLocation()); // Resolving a wording defect, we also inherit the variadicness of the // constructor. FunctionProtoType::ExtProtoInfo EPI; EPI.Variadic = T->isVariadic(); EPI.HasTrailingReturn = true; QualType Result = SemaRef.BuildFunctionType( ReturnType, ParamTypes, TL.getBeginLoc(), DeductionGuideName, EPI); if (Result.isNull()) return QualType(); FunctionProtoTypeLoc NewTL = TLB.push(Result); NewTL.setLocalRangeBegin(TL.getLocalRangeBegin()); NewTL.setLParenLoc(TL.getLParenLoc()); NewTL.setRParenLoc(TL.getRParenLoc()); NewTL.setExceptionSpecRange(SourceRange()); NewTL.setLocalRangeEnd(TL.getLocalRangeEnd()); for (unsigned I = 0, E = NewTL.getNumParams(); I != E; ++I) NewTL.setParam(I, Params[I]); return Result; } ParmVarDecl *transformFunctionTypeParam( ParmVarDecl *OldParam, MultiLevelTemplateArgumentList &Args, llvm::SmallVectorImpl &MaterializedTypedefs, bool TransformingOuterPatterns) { TypeSourceInfo *OldDI = OldParam->getTypeSourceInfo(); TypeSourceInfo *NewDI; if (auto PackTL = OldDI->getTypeLoc().getAs()) { // Expand out the one and only element in each inner pack. Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(SemaRef, 0); NewDI = SemaRef.SubstType(PackTL.getPatternLoc(), Args, OldParam->getLocation(), OldParam->getDeclName()); if (!NewDI) return nullptr; NewDI = SemaRef.CheckPackExpansion(NewDI, PackTL.getEllipsisLoc(), PackTL.getTypePtr()->getNumExpansions()); } else NewDI = SemaRef.SubstType(OldDI, Args, OldParam->getLocation(), OldParam->getDeclName()); if (!NewDI) return nullptr; // Extract the type. This (for instance) replaces references to typedef // members of the current instantiations with the definitions of those // typedefs, avoiding triggering instantiation of the deduced type during // deduction. NewDI = ExtractTypeForDeductionGuide( SemaRef, MaterializedTypedefs, NestedPattern, TransformingOuterPatterns ? &Args : nullptr) .transform(NewDI); // Resolving a wording defect, we also inherit default arguments from the // constructor. ExprResult NewDefArg; if (OldParam->hasDefaultArg()) { // We don't care what the value is (we won't use it); just create a // placeholder to indicate there is a default argument. QualType ParamTy = NewDI->getType(); NewDefArg = new (SemaRef.Context) OpaqueValueExpr(OldParam->getDefaultArgRange().getBegin(), ParamTy.getNonLValueExprType(SemaRef.Context), ParamTy->isLValueReferenceType() ? VK_LValue : ParamTy->isRValueReferenceType() ? VK_XValue : VK_PRValue); } // Handle arrays and functions decay. auto NewType = NewDI->getType(); if (NewType->isArrayType() || NewType->isFunctionType()) NewType = SemaRef.Context.getDecayedType(NewType); ParmVarDecl *NewParam = ParmVarDecl::Create( SemaRef.Context, DC, OldParam->getInnerLocStart(), OldParam->getLocation(), OldParam->getIdentifier(), NewType, NewDI, OldParam->getStorageClass(), NewDefArg.get()); NewParam->setScopeInfo(OldParam->getFunctionScopeDepth(), OldParam->getFunctionScopeIndex()); SemaRef.CurrentInstantiationScope->InstantiatedLocal(OldParam, NewParam); return NewParam; } }; unsigned getTemplateParameterDepth(NamedDecl *TemplateParam) { if (auto *TTP = dyn_cast(TemplateParam)) return TTP->getDepth(); if (auto *TTP = dyn_cast(TemplateParam)) return TTP->getDepth(); if (auto *NTTP = dyn_cast(TemplateParam)) return NTTP->getDepth(); llvm_unreachable("Unhandled template parameter types"); } unsigned getTemplateParameterIndex(NamedDecl *TemplateParam) { if (auto *TTP = dyn_cast(TemplateParam)) return TTP->getIndex(); if (auto *TTP = dyn_cast(TemplateParam)) return TTP->getIndex(); if (auto *NTTP = dyn_cast(TemplateParam)) return NTTP->getIndex(); llvm_unreachable("Unhandled template parameter types"); } // Find all template parameters that appear in the given DeducedArgs. // Return the indices of the template parameters in the TemplateParams. SmallVector TemplateParamsReferencedInTemplateArgumentList( const TemplateParameterList *TemplateParamsList, ArrayRef DeducedArgs) { struct TemplateParamsReferencedFinder : public RecursiveASTVisitor { const TemplateParameterList *TemplateParamList; llvm::BitVector ReferencedTemplateParams; TemplateParamsReferencedFinder( const TemplateParameterList *TemplateParamList) : TemplateParamList(TemplateParamList), ReferencedTemplateParams(TemplateParamList->size()) {} bool VisitTemplateTypeParmType(TemplateTypeParmType *TTP) { // We use the index and depth to retrieve the corresponding template // parameter from the parameter list, which is more robost. Mark(TTP->getDepth(), TTP->getIndex()); return true; } bool VisitDeclRefExpr(DeclRefExpr *DRE) { MarkAppeared(DRE->getFoundDecl()); return true; } bool TraverseTemplateName(TemplateName Template) { if (auto *TD = Template.getAsTemplateDecl()) MarkAppeared(TD); return RecursiveASTVisitor::TraverseTemplateName(Template); } void MarkAppeared(NamedDecl *ND) { if (llvm::isa(ND)) Mark(getTemplateParameterDepth(ND), getTemplateParameterIndex(ND)); } void Mark(unsigned Depth, unsigned Index) { if (Index < TemplateParamList->size() && TemplateParamList->getParam(Index)->getTemplateDepth() == Depth) ReferencedTemplateParams.set(Index); } }; TemplateParamsReferencedFinder Finder(TemplateParamsList); Finder.TraverseTemplateArguments(DeducedArgs); SmallVector Results; for (unsigned Index = 0; Index < TemplateParamsList->size(); ++Index) { if (Finder.ReferencedTemplateParams[Index]) Results.push_back(Index); } return Results; } bool hasDeclaredDeductionGuides(DeclarationName Name, DeclContext *DC) { // Check whether we've already declared deduction guides for this template. // FIXME: Consider storing a flag on the template to indicate this. assert(Name.getNameKind() == DeclarationName::NameKind::CXXDeductionGuideName && "name must be a deduction guide name"); auto Existing = DC->lookup(Name); for (auto *D : Existing) if (D->isImplicit()) return true; return false; } NamedDecl *transformTemplateParameter(Sema &SemaRef, DeclContext *DC, NamedDecl *TemplateParam, MultiLevelTemplateArgumentList &Args, unsigned NewIndex, unsigned NewDepth) { if (auto *TTP = dyn_cast(TemplateParam)) return transformTemplateTypeParam(SemaRef, DC, TTP, Args, NewDepth, NewIndex); if (auto *TTP = dyn_cast(TemplateParam)) return transformTemplateParam(SemaRef, DC, TTP, Args, NewIndex, NewDepth); if (auto *NTTP = dyn_cast(TemplateParam)) return transformTemplateParam(SemaRef, DC, NTTP, Args, NewIndex, NewDepth); llvm_unreachable("Unhandled template parameter types"); } // Build the associated constraints for the alias deduction guides. // C++ [over.match.class.deduct]p3.3: // The associated constraints ([temp.constr.decl]) are the conjunction of the // associated constraints of g and a constraint that is satisfied if and only // if the arguments of A are deducible (see below) from the return type. // // The return result is expected to be the require-clause for the synthesized // alias deduction guide. Expr * buildAssociatedConstraints(Sema &SemaRef, FunctionTemplateDecl *F, TypeAliasTemplateDecl *AliasTemplate, ArrayRef DeduceResults, unsigned FirstUndeducedParamIdx, Expr *IsDeducible) { Expr *RC = F->getTemplateParameters()->getRequiresClause(); if (!RC) return IsDeducible; ASTContext &Context = SemaRef.Context; LocalInstantiationScope Scope(SemaRef); // In the clang AST, constraint nodes are deliberately not instantiated unless // they are actively being evaluated. Consequently, occurrences of template // parameters in the require-clause expression have a subtle "depth" // difference compared to normal occurrences in places, such as function // parameters. When transforming the require-clause, we must take this // distinction into account: // // 1) In the transformed require-clause, occurrences of template parameters // must use the "uninstantiated" depth; // 2) When substituting on the require-clause expr of the underlying // deduction guide, we must use the entire set of template argument lists; // // It's important to note that we're performing this transformation on an // *instantiated* AliasTemplate. // For 1), if the alias template is nested within a class template, we // calcualte the 'uninstantiated' depth by adding the substitution level back. unsigned AdjustDepth = 0; if (auto *PrimaryTemplate = AliasTemplate->getInstantiatedFromMemberTemplate()) AdjustDepth = PrimaryTemplate->getTemplateDepth(); // We rebuild all template parameters with the uninstantiated depth, and // build template arguments refer to them. SmallVector AdjustedAliasTemplateArgs; for (auto *TP : *AliasTemplate->getTemplateParameters()) { // Rebuild any internal references to earlier parameters and reindex // as we go. MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(AdjustedAliasTemplateArgs); NamedDecl *NewParam = transformTemplateParameter( SemaRef, AliasTemplate->getDeclContext(), TP, Args, /*NewIndex=*/AdjustedAliasTemplateArgs.size(), getTemplateParameterDepth(TP) + AdjustDepth); TemplateArgument NewTemplateArgument = Context.getInjectedTemplateArg(NewParam); AdjustedAliasTemplateArgs.push_back(NewTemplateArgument); } // Template arguments used to transform the template arguments in // DeducedResults. SmallVector TemplateArgsForBuildingRC( F->getTemplateParameters()->size()); // Transform the transformed template args MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(AdjustedAliasTemplateArgs); for (unsigned Index = 0; Index < DeduceResults.size(); ++Index) { const auto &D = DeduceResults[Index]; if (D.isNull()) { // non-deduced template parameters of f NamedDecl *TP = F->getTemplateParameters()->getParam(Index); MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(TemplateArgsForBuildingRC); // Rebuild the template parameter with updated depth and index. NamedDecl *NewParam = transformTemplateParameter( SemaRef, F->getDeclContext(), TP, Args, /*NewIndex=*/FirstUndeducedParamIdx, getTemplateParameterDepth(TP) + AdjustDepth); FirstUndeducedParamIdx += 1; assert(TemplateArgsForBuildingRC[Index].isNull()); TemplateArgsForBuildingRC[Index] = Context.getInjectedTemplateArg(NewParam); continue; } TemplateArgumentLoc Input = SemaRef.getTrivialTemplateArgumentLoc(D, QualType(), SourceLocation{}); TemplateArgumentLoc Output; if (!SemaRef.SubstTemplateArgument(Input, Args, Output)) { assert(TemplateArgsForBuildingRC[Index].isNull() && "InstantiatedArgs must be null before setting"); TemplateArgsForBuildingRC[Index] = Output.getArgument(); } } // A list of template arguments for transforming the require-clause of F. // It must contain the entire set of template argument lists. MultiLevelTemplateArgumentList ArgsForBuildingRC; ArgsForBuildingRC.setKind(clang::TemplateSubstitutionKind::Rewrite); ArgsForBuildingRC.addOuterTemplateArguments(TemplateArgsForBuildingRC); // For 2), if the underlying deduction guide F is nested in a class template, // we need the entire template argument list, as the constraint AST in the // require-clause of F remains completely uninstantiated. // // For example: // template // depth 0 // struct Outer { // template // struct Foo { Foo(U); }; // // template // depth 1 // requires C // Foo(U) -> Foo; // }; // template // using AFoo = Outer::Foo; // // In this scenario, the deduction guide for `Foo` inside `Outer`: // - The occurrence of U in the require-expression is [depth:1, index:0] // - The occurrence of U in the function parameter is [depth:0, index:0] // - The template parameter of U is [depth:0, index:0] // // We add the outer template arguments which is [int] to the multi-level arg // list to ensure that the occurrence U in `C` will be replaced with int // during the substitution. // // NOTE: The underlying deduction guide F is instantiated -- either from an // explicitly-written deduction guide member, or from a constructor. // getInstantiatedFromMemberTemplate() can only handle the former case, so we // check the DeclContext kind. if (F->getLexicalDeclContext()->getDeclKind() == clang::Decl::ClassTemplateSpecialization) { auto OuterLevelArgs = SemaRef.getTemplateInstantiationArgs( F, F->getLexicalDeclContext(), /*Final=*/false, /*Innermost=*/std::nullopt, /*RelativeToPrimary=*/true, /*Pattern=*/nullptr, /*ForConstraintInstantiation=*/true); for (auto It : OuterLevelArgs) ArgsForBuildingRC.addOuterTemplateArguments(It.Args); } ExprResult E = SemaRef.SubstExpr(RC, ArgsForBuildingRC); if (E.isInvalid()) return nullptr; auto Conjunction = SemaRef.BuildBinOp(SemaRef.getCurScope(), SourceLocation{}, BinaryOperatorKind::BO_LAnd, E.get(), IsDeducible); if (Conjunction.isInvalid()) return nullptr; return Conjunction.getAs(); } // Build the is_deducible constraint for the alias deduction guides. // [over.match.class.deduct]p3.3: // ... and a constraint that is satisfied if and only if the arguments // of A are deducible (see below) from the return type. Expr *buildIsDeducibleConstraint(Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate, QualType ReturnType, SmallVector TemplateParams) { ASTContext &Context = SemaRef.Context; // Constraint AST nodes must use uninstantiated depth. if (auto *PrimaryTemplate = AliasTemplate->getInstantiatedFromMemberTemplate(); PrimaryTemplate && TemplateParams.size() > 0) { LocalInstantiationScope Scope(SemaRef); // Adjust the depth for TemplateParams. unsigned AdjustDepth = PrimaryTemplate->getTemplateDepth(); SmallVector TransformedTemplateArgs; for (auto *TP : TemplateParams) { // Rebuild any internal references to earlier parameters and reindex // as we go. MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(TransformedTemplateArgs); NamedDecl *NewParam = transformTemplateParameter( SemaRef, AliasTemplate->getDeclContext(), TP, Args, /*NewIndex=*/TransformedTemplateArgs.size(), getTemplateParameterDepth(TP) + AdjustDepth); TemplateArgument NewTemplateArgument = Context.getInjectedTemplateArg(NewParam); TransformedTemplateArgs.push_back(NewTemplateArgument); } // Transformed the ReturnType to restore the uninstantiated depth. MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(TransformedTemplateArgs); ReturnType = SemaRef.SubstType( ReturnType, Args, AliasTemplate->getLocation(), Context.DeclarationNames.getCXXDeductionGuideName(AliasTemplate)); }; SmallVector IsDeducibleTypeTraitArgs = { Context.getTrivialTypeSourceInfo( Context.getDeducedTemplateSpecializationType( TemplateName(AliasTemplate), /*DeducedType=*/QualType(), /*IsDependent=*/true)), // template specialization type whose // arguments will be deduced. Context.getTrivialTypeSourceInfo( ReturnType), // type from which template arguments are deduced. }; return TypeTraitExpr::Create( Context, Context.getLogicalOperationType(), AliasTemplate->getLocation(), TypeTrait::BTT_IsDeducible, IsDeducibleTypeTraitArgs, AliasTemplate->getLocation(), /*Value*/ false); } std::pair> getRHSTemplateDeclAndArgs(Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate) { // Unwrap the sugared ElaboratedType. auto RhsType = AliasTemplate->getTemplatedDecl() ->getUnderlyingType() .getSingleStepDesugaredType(SemaRef.Context); TemplateDecl *Template = nullptr; llvm::ArrayRef AliasRhsTemplateArgs; if (const auto *TST = RhsType->getAs()) { // Cases where the RHS of the alias is dependent. e.g. // template // using AliasFoo1 = Foo; // a class/type alias template specialization Template = TST->getTemplateName().getAsTemplateDecl(); AliasRhsTemplateArgs = TST->template_arguments(); } else if (const auto *RT = RhsType->getAs()) { // Cases where template arguments in the RHS of the alias are not // dependent. e.g. // using AliasFoo = Foo; if (const auto *CTSD = llvm::dyn_cast( RT->getAsCXXRecordDecl())) { Template = CTSD->getSpecializedTemplate(); AliasRhsTemplateArgs = CTSD->getTemplateArgs().asArray(); } } else { assert(false && "unhandled RHS type of the alias"); } return {Template, AliasRhsTemplateArgs}; } // Build deduction guides for a type alias template from the given underlying // deduction guide F. FunctionTemplateDecl * BuildDeductionGuideForTypeAlias(Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate, FunctionTemplateDecl *F, SourceLocation Loc) { LocalInstantiationScope Scope(SemaRef); Sema::InstantiatingTemplate BuildingDeductionGuides( SemaRef, AliasTemplate->getLocation(), F, Sema::InstantiatingTemplate::BuildingDeductionGuidesTag{}); if (BuildingDeductionGuides.isInvalid()) return nullptr; auto &Context = SemaRef.Context; auto [Template, AliasRhsTemplateArgs] = getRHSTemplateDeclAndArgs(SemaRef, AliasTemplate); auto RType = F->getTemplatedDecl()->getReturnType(); // The (trailing) return type of the deduction guide. const TemplateSpecializationType *FReturnType = RType->getAs(); if (const auto *InjectedCNT = RType->getAs()) // implicitly-generated deduction guide. FReturnType = InjectedCNT->getInjectedTST(); else if (const auto *ET = RType->getAs()) // explicit deduction guide. FReturnType = ET->getNamedType()->getAs(); assert(FReturnType && "expected to see a return type"); // Deduce template arguments of the deduction guide f from the RHS of // the alias. // // C++ [over.match.class.deduct]p3: ...For each function or function // template f in the guides of the template named by the // simple-template-id of the defining-type-id, the template arguments // of the return type of f are deduced from the defining-type-id of A // according to the process in [temp.deduct.type] with the exception // that deduction does not fail if not all template arguments are // deduced. // // // template // f(X, Y) -> f; // // template // using alias = f; // // The RHS of alias is f, we deduced the template arguments of // the return type of the deduction guide from it: Y->int, X->U sema::TemplateDeductionInfo TDeduceInfo(Loc); // Must initialize n elements, this is required by DeduceTemplateArguments. SmallVector DeduceResults( F->getTemplateParameters()->size()); // FIXME: DeduceTemplateArguments stops immediately at the first // non-deducible template argument. However, this doesn't seem to casue // issues for practice cases, we probably need to extend it to continue // performing deduction for rest of arguments to align with the C++ // standard. SemaRef.DeduceTemplateArguments( F->getTemplateParameters(), FReturnType->template_arguments(), AliasRhsTemplateArgs, TDeduceInfo, DeduceResults, /*NumberOfArgumentsMustMatch=*/false); SmallVector DeducedArgs; SmallVector NonDeducedTemplateParamsInFIndex; // !!NOTE: DeduceResults respects the sequence of template parameters of // the deduction guide f. for (unsigned Index = 0; Index < DeduceResults.size(); ++Index) { if (const auto &D = DeduceResults[Index]; !D.isNull()) // Deduced DeducedArgs.push_back(D); else NonDeducedTemplateParamsInFIndex.push_back(Index); } auto DeducedAliasTemplateParams = TemplateParamsReferencedInTemplateArgumentList( AliasTemplate->getTemplateParameters(), DeducedArgs); // All template arguments null by default. SmallVector TemplateArgsForBuildingFPrime( F->getTemplateParameters()->size()); // Create a template parameter list for the synthesized deduction guide f'. // // C++ [over.match.class.deduct]p3.2: // If f is a function template, f' is a function template whose template // parameter list consists of all the template parameters of A // (including their default template arguments) that appear in the above // deductions or (recursively) in their default template arguments SmallVector FPrimeTemplateParams; // Store template arguments that refer to the newly-created template // parameters, used for building `TemplateArgsForBuildingFPrime`. SmallVector TransformedDeducedAliasArgs( AliasTemplate->getTemplateParameters()->size()); for (unsigned AliasTemplateParamIdx : DeducedAliasTemplateParams) { auto *TP = AliasTemplate->getTemplateParameters()->getParam(AliasTemplateParamIdx); // Rebuild any internal references to earlier parameters and reindex as // we go. MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(TransformedDeducedAliasArgs); NamedDecl *NewParam = transformTemplateParameter( SemaRef, AliasTemplate->getDeclContext(), TP, Args, /*NewIndex=*/FPrimeTemplateParams.size(), getTemplateParameterDepth(TP)); FPrimeTemplateParams.push_back(NewParam); TemplateArgument NewTemplateArgument = Context.getInjectedTemplateArg(NewParam); TransformedDeducedAliasArgs[AliasTemplateParamIdx] = NewTemplateArgument; } unsigned FirstUndeducedParamIdx = FPrimeTemplateParams.size(); // ...followed by the template parameters of f that were not deduced // (including their default template arguments) for (unsigned FTemplateParamIdx : NonDeducedTemplateParamsInFIndex) { auto *TP = F->getTemplateParameters()->getParam(FTemplateParamIdx); MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); // We take a shortcut here, it is ok to reuse the // TemplateArgsForBuildingFPrime. Args.addOuterTemplateArguments(TemplateArgsForBuildingFPrime); NamedDecl *NewParam = transformTemplateParameter( SemaRef, F->getDeclContext(), TP, Args, FPrimeTemplateParams.size(), getTemplateParameterDepth(TP)); FPrimeTemplateParams.push_back(NewParam); assert(TemplateArgsForBuildingFPrime[FTemplateParamIdx].isNull() && "The argument must be null before setting"); TemplateArgsForBuildingFPrime[FTemplateParamIdx] = Context.getInjectedTemplateArg(NewParam); } // To form a deduction guide f' from f, we leverage clang's instantiation // mechanism, we construct a template argument list where the template // arguments refer to the newly-created template parameters of f', and // then apply instantiation on this template argument list to instantiate // f, this ensures all template parameter occurrences are updated // correctly. // // The template argument list is formed from the `DeducedArgs`, two parts: // 1) appeared template parameters of alias: transfrom the deduced // template argument; // 2) non-deduced template parameters of f: rebuild a // template argument; // // 2) has been built already (when rebuilding the new template // parameters), we now perform 1). MultiLevelTemplateArgumentList Args; Args.setKind(TemplateSubstitutionKind::Rewrite); Args.addOuterTemplateArguments(TransformedDeducedAliasArgs); for (unsigned Index = 0; Index < DeduceResults.size(); ++Index) { const auto &D = DeduceResults[Index]; if (D.isNull()) { // 2): Non-deduced template parameter has been built already. assert(!TemplateArgsForBuildingFPrime[Index].isNull() && "template arguments for non-deduced template parameters should " "be been set!"); continue; } TemplateArgumentLoc Input = SemaRef.getTrivialTemplateArgumentLoc(D, QualType(), SourceLocation{}); TemplateArgumentLoc Output; if (!SemaRef.SubstTemplateArgument(Input, Args, Output)) { assert(TemplateArgsForBuildingFPrime[Index].isNull() && "InstantiatedArgs must be null before setting"); TemplateArgsForBuildingFPrime[Index] = Output.getArgument(); } } auto *TemplateArgListForBuildingFPrime = TemplateArgumentList::CreateCopy(Context, TemplateArgsForBuildingFPrime); // Form the f' by substituting the template arguments into f. if (auto *FPrime = SemaRef.InstantiateFunctionDeclaration( F, TemplateArgListForBuildingFPrime, AliasTemplate->getLocation(), Sema::CodeSynthesisContext::BuildingDeductionGuides)) { auto *GG = cast(FPrime); Expr *IsDeducible = buildIsDeducibleConstraint( SemaRef, AliasTemplate, FPrime->getReturnType(), FPrimeTemplateParams); Expr *RequiresClause = buildAssociatedConstraints(SemaRef, F, AliasTemplate, DeduceResults, FirstUndeducedParamIdx, IsDeducible); auto *FPrimeTemplateParamList = TemplateParameterList::Create( Context, AliasTemplate->getTemplateParameters()->getTemplateLoc(), AliasTemplate->getTemplateParameters()->getLAngleLoc(), FPrimeTemplateParams, AliasTemplate->getTemplateParameters()->getRAngleLoc(), /*RequiresClause=*/RequiresClause); auto *Result = cast(buildDeductionGuide( SemaRef, AliasTemplate, FPrimeTemplateParamList, GG->getCorrespondingConstructor(), GG->getExplicitSpecifier(), GG->getTypeSourceInfo(), AliasTemplate->getBeginLoc(), AliasTemplate->getLocation(), AliasTemplate->getEndLoc(), F->isImplicit())); cast(Result->getTemplatedDecl()) ->setDeductionCandidateKind(GG->getDeductionCandidateKind()); return Result; } return nullptr; } void DeclareImplicitDeductionGuidesForTypeAlias( Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate, SourceLocation Loc) { if (AliasTemplate->isInvalidDecl()) return; auto &Context = SemaRef.Context; // FIXME: if there is an explicit deduction guide after the first use of the // type alias usage, we will not cover this explicit deduction guide. fix this // case. if (hasDeclaredDeductionGuides( Context.DeclarationNames.getCXXDeductionGuideName(AliasTemplate), AliasTemplate->getDeclContext())) return; auto [Template, AliasRhsTemplateArgs] = getRHSTemplateDeclAndArgs(SemaRef, AliasTemplate); if (!Template) return; DeclarationNameInfo NameInfo( Context.DeclarationNames.getCXXDeductionGuideName(Template), Loc); LookupResult Guides(SemaRef, NameInfo, clang::Sema::LookupOrdinaryName); SemaRef.LookupQualifiedName(Guides, Template->getDeclContext()); Guides.suppressDiagnostics(); for (auto *G : Guides) { if (auto *DG = dyn_cast(G)) { // The deduction guide is a non-template function decl, we just clone it. auto *FunctionType = SemaRef.Context.getTrivialTypeSourceInfo(DG->getType()); FunctionProtoTypeLoc FPTL = FunctionType->getTypeLoc().castAs(); // Clone the parameters. for (unsigned I = 0, N = DG->getNumParams(); I != N; ++I) { const auto *P = DG->getParamDecl(I); auto *TSI = SemaRef.Context.getTrivialTypeSourceInfo(P->getType()); ParmVarDecl *NewParam = ParmVarDecl::Create( SemaRef.Context, G->getDeclContext(), DG->getParamDecl(I)->getBeginLoc(), P->getLocation(), nullptr, TSI->getType(), TSI, SC_None, nullptr); NewParam->setScopeInfo(0, I); FPTL.setParam(I, NewParam); } auto *Transformed = cast(buildDeductionGuide( SemaRef, AliasTemplate, /*TemplateParams=*/nullptr, /*Constructor=*/nullptr, DG->getExplicitSpecifier(), FunctionType, AliasTemplate->getBeginLoc(), AliasTemplate->getLocation(), AliasTemplate->getEndLoc(), DG->isImplicit())); // FIXME: Here the synthesized deduction guide is not a templated // function. Per [dcl.decl]p4, the requires-clause shall be present only // if the declarator declares a templated function, a bug in standard? auto *Constraint = buildIsDeducibleConstraint( SemaRef, AliasTemplate, Transformed->getReturnType(), {}); if (auto *RC = DG->getTrailingRequiresClause()) { auto Conjunction = SemaRef.BuildBinOp(SemaRef.getCurScope(), SourceLocation{}, BinaryOperatorKind::BO_LAnd, RC, Constraint); if (!Conjunction.isInvalid()) Constraint = Conjunction.getAs(); } Transformed->setTrailingRequiresClause(Constraint); } FunctionTemplateDecl *F = dyn_cast(G); if (!F) continue; // The **aggregate** deduction guides are handled in a different code path // (DeclareAggregateDeductionGuideFromInitList), which involves the tricky // cache. if (cast(F->getTemplatedDecl()) ->getDeductionCandidateKind() == DeductionCandidate::Aggregate) continue; BuildDeductionGuideForTypeAlias(SemaRef, AliasTemplate, F, Loc); } } // Build an aggregate deduction guide for a type alias template. FunctionTemplateDecl *DeclareAggregateDeductionGuideForTypeAlias( Sema &SemaRef, TypeAliasTemplateDecl *AliasTemplate, MutableArrayRef ParamTypes, SourceLocation Loc) { TemplateDecl *RHSTemplate = getRHSTemplateDeclAndArgs(SemaRef, AliasTemplate).first; if (!RHSTemplate) return nullptr; + + llvm::SmallVector TypedefDecls; + llvm::SmallVector NewParamTypes; + ExtractTypeForDeductionGuide TypeAliasTransformer(SemaRef, TypedefDecls); + for (QualType P : ParamTypes) { + QualType Type = TypeAliasTransformer.TransformType(P); + if (Type.isNull()) + return nullptr; + NewParamTypes.push_back(Type); + } + auto *RHSDeductionGuide = SemaRef.DeclareAggregateDeductionGuideFromInitList( - RHSTemplate, ParamTypes, Loc); + RHSTemplate, NewParamTypes, Loc); if (!RHSDeductionGuide) return nullptr; + + for (TypedefNameDecl *TD : TypedefDecls) + TD->setDeclContext(RHSDeductionGuide->getTemplatedDecl()); + return BuildDeductionGuideForTypeAlias(SemaRef, AliasTemplate, RHSDeductionGuide, Loc); } } // namespace FunctionTemplateDecl *Sema::DeclareAggregateDeductionGuideFromInitList( TemplateDecl *Template, MutableArrayRef ParamTypes, SourceLocation Loc) { llvm::FoldingSetNodeID ID; ID.AddPointer(Template); for (auto &T : ParamTypes) T.getCanonicalType().Profile(ID); unsigned Hash = ID.ComputeHash(); auto Found = AggregateDeductionCandidates.find(Hash); if (Found != AggregateDeductionCandidates.end()) { CXXDeductionGuideDecl *GD = Found->getSecond(); return GD->getDescribedFunctionTemplate(); } if (auto *AliasTemplate = llvm::dyn_cast(Template)) { if (auto *FTD = DeclareAggregateDeductionGuideForTypeAlias( *this, AliasTemplate, ParamTypes, Loc)) { auto *GD = cast(FTD->getTemplatedDecl()); GD->setDeductionCandidateKind(DeductionCandidate::Aggregate); AggregateDeductionCandidates[Hash] = GD; return FTD; } } if (CXXRecordDecl *DefRecord = cast(Template->getTemplatedDecl())->getDefinition()) { if (TemplateDecl *DescribedTemplate = DefRecord->getDescribedClassTemplate()) Template = DescribedTemplate; } DeclContext *DC = Template->getDeclContext(); if (DC->isDependentContext()) return nullptr; ConvertConstructorToDeductionGuideTransform Transform( *this, cast(Template)); if (!isCompleteType(Loc, Transform.DeducedType)) return nullptr; // In case we were expanding a pack when we attempted to declare deduction // guides, turn off pack expansion for everything we're about to do. ArgumentPackSubstitutionIndexRAII SubstIndex(*this, /*NewSubstitutionIndex=*/-1); // Create a template instantiation record to track the "instantiation" of // constructors into deduction guides. InstantiatingTemplate BuildingDeductionGuides( *this, Loc, Template, Sema::InstantiatingTemplate::BuildingDeductionGuidesTag{}); if (BuildingDeductionGuides.isInvalid()) return nullptr; ClassTemplateDecl *Pattern = Transform.NestedPattern ? Transform.NestedPattern : Transform.Template; ContextRAII SavedContext(*this, Pattern->getTemplatedDecl()); auto *FTD = cast( Transform.buildSimpleDeductionGuide(ParamTypes)); SavedContext.pop(); auto *GD = cast(FTD->getTemplatedDecl()); GD->setDeductionCandidateKind(DeductionCandidate::Aggregate); AggregateDeductionCandidates[Hash] = GD; return FTD; } void Sema::DeclareImplicitDeductionGuides(TemplateDecl *Template, SourceLocation Loc) { if (auto *AliasTemplate = llvm::dyn_cast(Template)) { DeclareImplicitDeductionGuidesForTypeAlias(*this, AliasTemplate, Loc); return; } if (CXXRecordDecl *DefRecord = cast(Template->getTemplatedDecl())->getDefinition()) { if (TemplateDecl *DescribedTemplate = DefRecord->getDescribedClassTemplate()) Template = DescribedTemplate; } DeclContext *DC = Template->getDeclContext(); if (DC->isDependentContext()) return; ConvertConstructorToDeductionGuideTransform Transform( *this, cast(Template)); if (!isCompleteType(Loc, Transform.DeducedType)) return; if (hasDeclaredDeductionGuides(Transform.DeductionGuideName, DC)) return; // In case we were expanding a pack when we attempted to declare deduction // guides, turn off pack expansion for everything we're about to do. ArgumentPackSubstitutionIndexRAII SubstIndex(*this, -1); // Create a template instantiation record to track the "instantiation" of // constructors into deduction guides. InstantiatingTemplate BuildingDeductionGuides( *this, Loc, Template, Sema::InstantiatingTemplate::BuildingDeductionGuidesTag{}); if (BuildingDeductionGuides.isInvalid()) return; // Convert declared constructors into deduction guide templates. // FIXME: Skip constructors for which deduction must necessarily fail (those // for which some class template parameter without a default argument never // appears in a deduced context). ClassTemplateDecl *Pattern = Transform.NestedPattern ? Transform.NestedPattern : Transform.Template; ContextRAII SavedContext(*this, Pattern->getTemplatedDecl()); llvm::SmallPtrSet ProcessedCtors; bool AddedAny = false; for (NamedDecl *D : LookupConstructors(Pattern->getTemplatedDecl())) { D = D->getUnderlyingDecl(); if (D->isInvalidDecl() || D->isImplicit()) continue; D = cast(D->getCanonicalDecl()); // Within C++20 modules, we may have multiple same constructors in // multiple same RecordDecls. And it doesn't make sense to create // duplicated deduction guides for the duplicated constructors. if (ProcessedCtors.count(D)) continue; auto *FTD = dyn_cast(D); auto *CD = dyn_cast_or_null(FTD ? FTD->getTemplatedDecl() : D); // Class-scope explicit specializations (MS extension) do not result in // deduction guides. if (!CD || (!FTD && CD->isFunctionTemplateSpecialization())) continue; // Cannot make a deduction guide when unparsed arguments are present. if (llvm::any_of(CD->parameters(), [](ParmVarDecl *P) { return !P || P->hasUnparsedDefaultArg(); })) continue; ProcessedCtors.insert(D); Transform.transformConstructor(FTD, CD); AddedAny = true; } // C++17 [over.match.class.deduct] // -- If C is not defined or does not declare any constructors, an // additional function template derived as above from a hypothetical // constructor C(). if (!AddedAny) Transform.buildSimpleDeductionGuide(std::nullopt); // -- An additional function template derived as above from a hypothetical // constructor C(C), called the copy deduction candidate. cast( cast( Transform.buildSimpleDeductionGuide(Transform.DeducedType)) ->getTemplatedDecl()) ->setDeductionCandidateKind(DeductionCandidate::Copy); SavedContext.pop(); } diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp index c118f3818467..154acdfbe032 100644 --- a/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/contrib/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp @@ -1,4808 +1,4809 @@ //===- ASTReaderDecl.cpp - Decl Deserialization ---------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the ASTReader::readDeclRecord method, which is the // entrypoint for loading a decl. // //===----------------------------------------------------------------------===// #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "clang/AST/ASTConcept.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTStructuralEquivalence.h" #include "clang/AST/Attr.h" #include "clang/AST/AttrIterator.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/Redeclarable.h" #include "clang/AST/Stmt.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "clang/AST/UnresolvedSet.h" #include "clang/Basic/AttrKinds.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/ExceptionSpecificationType.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/Lambda.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/Linkage.h" #include "clang/Basic/Module.h" #include "clang/Basic/PragmaKinds.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "clang/Basic/Stack.h" #include "clang/Sema/IdentifierResolver.h" #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTRecordReader.h" #include "clang/Serialization/ContinuousRangeMap.h" #include "clang/Serialization/ModuleFile.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Bitstream/BitstreamReader.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SaveAndRestore.h" #include #include #include #include #include #include using namespace clang; using namespace serialization; //===----------------------------------------------------------------------===// // Declaration deserialization //===----------------------------------------------------------------------===// namespace clang { class ASTDeclReader : public DeclVisitor { ASTReader &Reader; ASTRecordReader &Record; ASTReader::RecordLocation Loc; const GlobalDeclID ThisDeclID; const SourceLocation ThisDeclLoc; using RecordData = ASTReader::RecordData; TypeID DeferredTypeID = 0; unsigned AnonymousDeclNumber = 0; GlobalDeclID NamedDeclForTagDecl = GlobalDeclID(); IdentifierInfo *TypedefNameForLinkage = nullptr; ///A flag to carry the information for a decl from the entity is /// used. We use it to delay the marking of the canonical decl as used until /// the entire declaration is deserialized and merged. bool IsDeclMarkedUsed = false; uint64_t GetCurrentCursorOffset(); uint64_t ReadLocalOffset() { uint64_t LocalOffset = Record.readInt(); assert(LocalOffset < Loc.Offset && "offset point after current record"); return LocalOffset ? Loc.Offset - LocalOffset : 0; } uint64_t ReadGlobalOffset() { uint64_t Local = ReadLocalOffset(); return Local ? Record.getGlobalBitOffset(Local) : 0; } SourceLocation readSourceLocation() { return Record.readSourceLocation(); } SourceRange readSourceRange() { return Record.readSourceRange(); } TypeSourceInfo *readTypeSourceInfo() { return Record.readTypeSourceInfo(); } GlobalDeclID readDeclID() { return Record.readDeclID(); } std::string readString() { return Record.readString(); } void readDeclIDList(SmallVectorImpl &IDs) { for (unsigned I = 0, Size = Record.readInt(); I != Size; ++I) IDs.push_back(readDeclID()); } Decl *readDecl() { return Record.readDecl(); } template T *readDeclAs() { return Record.readDeclAs(); } serialization::SubmoduleID readSubmoduleID() { if (Record.getIdx() == Record.size()) return 0; return Record.getGlobalSubmoduleID(Record.readInt()); } Module *readModule() { return Record.getSubmodule(readSubmoduleID()); } void ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update, Decl *LambdaContext = nullptr, unsigned IndexInLambdaContext = 0); void ReadCXXDefinitionData(struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D, Decl *LambdaContext, unsigned IndexInLambdaContext); void MergeDefinitionData(CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&NewDD); void ReadObjCDefinitionData(struct ObjCInterfaceDecl::DefinitionData &Data); void MergeDefinitionData(ObjCInterfaceDecl *D, struct ObjCInterfaceDecl::DefinitionData &&NewDD); void ReadObjCDefinitionData(struct ObjCProtocolDecl::DefinitionData &Data); void MergeDefinitionData(ObjCProtocolDecl *D, struct ObjCProtocolDecl::DefinitionData &&NewDD); static DeclContext *getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC); static NamedDecl *getAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index); static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index, NamedDecl *D); /// Commit to a primary definition of the class RD, which is known to be /// a definition of the class. We might not have read the definition data /// for it yet. If we haven't then allocate placeholder definition data /// now too. static CXXRecordDecl *getOrFakePrimaryClassDefinition(ASTReader &Reader, CXXRecordDecl *RD); /// Results from loading a RedeclarableDecl. class RedeclarableResult { Decl *MergeWith; GlobalDeclID FirstID; bool IsKeyDecl; public: RedeclarableResult(Decl *MergeWith, GlobalDeclID FirstID, bool IsKeyDecl) : MergeWith(MergeWith), FirstID(FirstID), IsKeyDecl(IsKeyDecl) {} /// Retrieve the first ID. GlobalDeclID getFirstID() const { return FirstID; } /// Is this declaration a key declaration? bool isKeyDecl() const { return IsKeyDecl; } /// Get a known declaration that this should be merged with, if /// any. Decl *getKnownMergeTarget() const { return MergeWith; } }; /// Class used to capture the result of searching for an existing /// declaration of a specific kind and name, along with the ability /// to update the place where this result was found (the declaration /// chain hanging off an identifier or the DeclContext we searched in) /// if requested. class FindExistingResult { ASTReader &Reader; NamedDecl *New = nullptr; NamedDecl *Existing = nullptr; bool AddResult = false; unsigned AnonymousDeclNumber = 0; IdentifierInfo *TypedefNameForLinkage = nullptr; public: FindExistingResult(ASTReader &Reader) : Reader(Reader) {} FindExistingResult(ASTReader &Reader, NamedDecl *New, NamedDecl *Existing, unsigned AnonymousDeclNumber, IdentifierInfo *TypedefNameForLinkage) : Reader(Reader), New(New), Existing(Existing), AddResult(true), AnonymousDeclNumber(AnonymousDeclNumber), TypedefNameForLinkage(TypedefNameForLinkage) {} FindExistingResult(FindExistingResult &&Other) : Reader(Other.Reader), New(Other.New), Existing(Other.Existing), AddResult(Other.AddResult), AnonymousDeclNumber(Other.AnonymousDeclNumber), TypedefNameForLinkage(Other.TypedefNameForLinkage) { Other.AddResult = false; } FindExistingResult &operator=(FindExistingResult &&) = delete; ~FindExistingResult(); /// Suppress the addition of this result into the known set of /// names. void suppress() { AddResult = false; } operator NamedDecl*() const { return Existing; } template operator T*() const { return dyn_cast_or_null(Existing); } }; static DeclContext *getPrimaryContextForMerging(ASTReader &Reader, DeclContext *DC); FindExistingResult findExisting(NamedDecl *D); public: ASTDeclReader(ASTReader &Reader, ASTRecordReader &Record, ASTReader::RecordLocation Loc, GlobalDeclID thisDeclID, SourceLocation ThisDeclLoc) : Reader(Reader), Record(Record), Loc(Loc), ThisDeclID(thisDeclID), ThisDeclLoc(ThisDeclLoc) {} template static void AddLazySpecializations(T *D, SmallVectorImpl &IDs) { if (IDs.empty()) return; // FIXME: We should avoid this pattern of getting the ASTContext. ASTContext &C = D->getASTContext(); auto *&LazySpecializations = D->getCommonPtr()->LazySpecializations; if (auto &Old = LazySpecializations) { IDs.insert(IDs.end(), Old + 1, Old + 1 + Old[0].getRawValue()); llvm::sort(IDs); IDs.erase(std::unique(IDs.begin(), IDs.end()), IDs.end()); } auto *Result = new (C) GlobalDeclID[1 + IDs.size()]; *Result = GlobalDeclID(IDs.size()); std::copy(IDs.begin(), IDs.end(), Result + 1); LazySpecializations = Result; } template static Decl *getMostRecentDeclImpl(Redeclarable *D); static Decl *getMostRecentDeclImpl(...); static Decl *getMostRecentDecl(Decl *D); static void mergeInheritableAttributes(ASTReader &Reader, Decl *D, Decl *Previous); template static void attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon); static void attachPreviousDeclImpl(ASTReader &Reader, ...); static void attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous, Decl *Canon); template static void attachLatestDeclImpl(Redeclarable *D, Decl *Latest); static void attachLatestDeclImpl(...); static void attachLatestDecl(Decl *D, Decl *latest); template static void markIncompleteDeclChainImpl(Redeclarable *D); static void markIncompleteDeclChainImpl(...); void ReadFunctionDefinition(FunctionDecl *FD); void Visit(Decl *D); void UpdateDecl(Decl *D, SmallVectorImpl &); static void setNextObjCCategory(ObjCCategoryDecl *Cat, ObjCCategoryDecl *Next) { Cat->NextClassCategory = Next; } void VisitDecl(Decl *D); void VisitPragmaCommentDecl(PragmaCommentDecl *D); void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D); void VisitTranslationUnitDecl(TranslationUnitDecl *TU); void VisitNamedDecl(NamedDecl *ND); void VisitLabelDecl(LabelDecl *LD); void VisitNamespaceDecl(NamespaceDecl *D); void VisitHLSLBufferDecl(HLSLBufferDecl *D); void VisitUsingDirectiveDecl(UsingDirectiveDecl *D); void VisitNamespaceAliasDecl(NamespaceAliasDecl *D); void VisitTypeDecl(TypeDecl *TD); RedeclarableResult VisitTypedefNameDecl(TypedefNameDecl *TD); void VisitTypedefDecl(TypedefDecl *TD); void VisitTypeAliasDecl(TypeAliasDecl *TD); void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D); void VisitUnresolvedUsingIfExistsDecl(UnresolvedUsingIfExistsDecl *D); RedeclarableResult VisitTagDecl(TagDecl *TD); void VisitEnumDecl(EnumDecl *ED); RedeclarableResult VisitRecordDeclImpl(RecordDecl *RD); void VisitRecordDecl(RecordDecl *RD); RedeclarableResult VisitCXXRecordDeclImpl(CXXRecordDecl *D); void VisitCXXRecordDecl(CXXRecordDecl *D) { VisitCXXRecordDeclImpl(D); } RedeclarableResult VisitClassTemplateSpecializationDeclImpl( ClassTemplateSpecializationDecl *D); void VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D) { VisitClassTemplateSpecializationDeclImpl(D); } void VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D); RedeclarableResult VisitVarTemplateSpecializationDeclImpl(VarTemplateSpecializationDecl *D); void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D) { VisitVarTemplateSpecializationDeclImpl(D); } void VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D); void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D); void VisitValueDecl(ValueDecl *VD); void VisitEnumConstantDecl(EnumConstantDecl *ECD); void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D); void VisitDeclaratorDecl(DeclaratorDecl *DD); void VisitFunctionDecl(FunctionDecl *FD); void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *GD); void VisitCXXMethodDecl(CXXMethodDecl *D); void VisitCXXConstructorDecl(CXXConstructorDecl *D); void VisitCXXDestructorDecl(CXXDestructorDecl *D); void VisitCXXConversionDecl(CXXConversionDecl *D); void VisitFieldDecl(FieldDecl *FD); void VisitMSPropertyDecl(MSPropertyDecl *FD); void VisitMSGuidDecl(MSGuidDecl *D); void VisitUnnamedGlobalConstantDecl(UnnamedGlobalConstantDecl *D); void VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D); void VisitIndirectFieldDecl(IndirectFieldDecl *FD); RedeclarableResult VisitVarDeclImpl(VarDecl *D); void ReadVarDeclInit(VarDecl *VD); void VisitVarDecl(VarDecl *VD) { VisitVarDeclImpl(VD); } void VisitImplicitParamDecl(ImplicitParamDecl *PD); void VisitParmVarDecl(ParmVarDecl *PD); void VisitDecompositionDecl(DecompositionDecl *DD); void VisitBindingDecl(BindingDecl *BD); void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D); void VisitTemplateDecl(TemplateDecl *D); void VisitConceptDecl(ConceptDecl *D); void VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D); void VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D); RedeclarableResult VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D); void VisitClassTemplateDecl(ClassTemplateDecl *D); void VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D); void VisitVarTemplateDecl(VarTemplateDecl *D); void VisitFunctionTemplateDecl(FunctionTemplateDecl *D); void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D); void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D); void VisitUsingDecl(UsingDecl *D); void VisitUsingEnumDecl(UsingEnumDecl *D); void VisitUsingPackDecl(UsingPackDecl *D); void VisitUsingShadowDecl(UsingShadowDecl *D); void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D); void VisitLinkageSpecDecl(LinkageSpecDecl *D); void VisitExportDecl(ExportDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *AD); void VisitTopLevelStmtDecl(TopLevelStmtDecl *D); void VisitImportDecl(ImportDecl *D); void VisitAccessSpecDecl(AccessSpecDecl *D); void VisitFriendDecl(FriendDecl *D); void VisitFriendTemplateDecl(FriendTemplateDecl *D); void VisitStaticAssertDecl(StaticAssertDecl *D); void VisitBlockDecl(BlockDecl *BD); void VisitCapturedDecl(CapturedDecl *CD); void VisitEmptyDecl(EmptyDecl *D); void VisitLifetimeExtendedTemporaryDecl(LifetimeExtendedTemporaryDecl *D); std::pair VisitDeclContext(DeclContext *DC); template RedeclarableResult VisitRedeclarable(Redeclarable *D); template void mergeRedeclarable(Redeclarable *D, RedeclarableResult &Redecl); void mergeLambda(CXXRecordDecl *D, RedeclarableResult &Redecl, Decl *Context, unsigned Number); void mergeRedeclarableTemplate(RedeclarableTemplateDecl *D, RedeclarableResult &Redecl); template void mergeRedeclarable(Redeclarable *D, T *Existing, RedeclarableResult &Redecl); template void mergeMergeable(Mergeable *D); void mergeMergeable(LifetimeExtendedTemporaryDecl *D); void mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, bool IsKeyDecl); ObjCTypeParamList *ReadObjCTypeParamList(); // FIXME: Reorder according to DeclNodes.td? void VisitObjCMethodDecl(ObjCMethodDecl *D); void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D); void VisitObjCContainerDecl(ObjCContainerDecl *D); void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D); void VisitObjCIvarDecl(ObjCIvarDecl *D); void VisitObjCProtocolDecl(ObjCProtocolDecl *D); void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D); void VisitObjCCategoryDecl(ObjCCategoryDecl *D); void VisitObjCImplDecl(ObjCImplDecl *D); void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D); void VisitObjCImplementationDecl(ObjCImplementationDecl *D); void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D); void VisitObjCPropertyDecl(ObjCPropertyDecl *D); void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D); void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D); void VisitOMPAllocateDecl(OMPAllocateDecl *D); void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D); void VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D); void VisitOMPRequiresDecl(OMPRequiresDecl *D); void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D); }; } // namespace clang namespace { /// Iterator over the redeclarations of a declaration that have already /// been merged into the same redeclaration chain. template class MergedRedeclIterator { DeclT *Start = nullptr; DeclT *Canonical = nullptr; DeclT *Current = nullptr; public: MergedRedeclIterator() = default; MergedRedeclIterator(DeclT *Start) : Start(Start), Current(Start) {} DeclT *operator*() { return Current; } MergedRedeclIterator &operator++() { if (Current->isFirstDecl()) { Canonical = Current; Current = Current->getMostRecentDecl(); } else Current = Current->getPreviousDecl(); // If we started in the merged portion, we'll reach our start position // eventually. Otherwise, we'll never reach it, but the second declaration // we reached was the canonical declaration, so stop when we see that one // again. if (Current == Start || Current == Canonical) Current = nullptr; return *this; } friend bool operator!=(const MergedRedeclIterator &A, const MergedRedeclIterator &B) { return A.Current != B.Current; } }; } // namespace template static llvm::iterator_range> merged_redecls(DeclT *D) { return llvm::make_range(MergedRedeclIterator(D), MergedRedeclIterator()); } uint64_t ASTDeclReader::GetCurrentCursorOffset() { return Loc.F->DeclsCursor.GetCurrentBitNo() + Loc.F->GlobalBitOffset; } void ASTDeclReader::ReadFunctionDefinition(FunctionDecl *FD) { if (Record.readInt()) { Reader.DefinitionSource[FD] = Loc.F->Kind == ModuleKind::MK_MainFile || Reader.getContext().getLangOpts().BuildingPCHWithObjectFile; } if (auto *CD = dyn_cast(FD)) { CD->setNumCtorInitializers(Record.readInt()); if (CD->getNumCtorInitializers()) CD->CtorInitializers = ReadGlobalOffset(); } // Store the offset of the body so we can lazily load it later. Reader.PendingBodies[FD] = GetCurrentCursorOffset(); } void ASTDeclReader::Visit(Decl *D) { DeclVisitor::Visit(D); // At this point we have deserialized and merged the decl and it is safe to // update its canonical decl to signal that the entire entity is used. D->getCanonicalDecl()->Used |= IsDeclMarkedUsed; IsDeclMarkedUsed = false; if (auto *DD = dyn_cast(D)) { if (auto *TInfo = DD->getTypeSourceInfo()) Record.readTypeLoc(TInfo->getTypeLoc()); } if (auto *TD = dyn_cast(D)) { // We have a fully initialized TypeDecl. Read its type now. TD->setTypeForDecl(Reader.GetType(DeferredTypeID).getTypePtrOrNull()); // If this is a tag declaration with a typedef name for linkage, it's safe // to load that typedef now. if (NamedDeclForTagDecl.isValid()) cast(D)->TypedefNameDeclOrQualifier = cast(Reader.GetDecl(NamedDeclForTagDecl)); } else if (auto *ID = dyn_cast(D)) { // if we have a fully initialized TypeDecl, we can safely read its type now. ID->TypeForDecl = Reader.GetType(DeferredTypeID).getTypePtrOrNull(); } else if (auto *FD = dyn_cast(D)) { // FunctionDecl's body was written last after all other Stmts/Exprs. if (Record.readInt()) ReadFunctionDefinition(FD); } else if (auto *VD = dyn_cast(D)) { ReadVarDeclInit(VD); } else if (auto *FD = dyn_cast(D)) { if (FD->hasInClassInitializer() && Record.readInt()) { FD->setLazyInClassInitializer(LazyDeclStmtPtr(GetCurrentCursorOffset())); } } } void ASTDeclReader::VisitDecl(Decl *D) { BitsUnpacker DeclBits(Record.readInt()); auto ModuleOwnership = (Decl::ModuleOwnershipKind)DeclBits.getNextBits(/*Width=*/3); D->setReferenced(DeclBits.getNextBit()); D->Used = DeclBits.getNextBit(); IsDeclMarkedUsed |= D->Used; D->setAccess((AccessSpecifier)DeclBits.getNextBits(/*Width=*/2)); D->setImplicit(DeclBits.getNextBit()); bool HasStandaloneLexicalDC = DeclBits.getNextBit(); bool HasAttrs = DeclBits.getNextBit(); D->setTopLevelDeclInObjCContainer(DeclBits.getNextBit()); D->InvalidDecl = DeclBits.getNextBit(); D->FromASTFile = true; if (D->isTemplateParameter() || D->isTemplateParameterPack() || isa(D)) { // We don't want to deserialize the DeclContext of a template // parameter or of a parameter of a function template immediately. These // entities might be used in the formulation of its DeclContext (for // example, a function parameter can be used in decltype() in trailing // return type of the function). Use the translation unit DeclContext as a // placeholder. GlobalDeclID SemaDCIDForTemplateParmDecl = readDeclID(); GlobalDeclID LexicalDCIDForTemplateParmDecl = HasStandaloneLexicalDC ? readDeclID() : GlobalDeclID(); if (LexicalDCIDForTemplateParmDecl.isInvalid()) LexicalDCIDForTemplateParmDecl = SemaDCIDForTemplateParmDecl; Reader.addPendingDeclContextInfo(D, SemaDCIDForTemplateParmDecl, LexicalDCIDForTemplateParmDecl); D->setDeclContext(Reader.getContext().getTranslationUnitDecl()); } else { auto *SemaDC = readDeclAs(); auto *LexicalDC = HasStandaloneLexicalDC ? readDeclAs() : nullptr; if (!LexicalDC) LexicalDC = SemaDC; // If the context is a class, we might not have actually merged it yet, in // the case where the definition comes from an update record. DeclContext *MergedSemaDC; if (auto *RD = dyn_cast(SemaDC)) MergedSemaDC = getOrFakePrimaryClassDefinition(Reader, RD); else MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC); // Avoid calling setLexicalDeclContext() directly because it uses // Decl::getASTContext() internally which is unsafe during derialization. D->setDeclContextsImpl(MergedSemaDC ? MergedSemaDC : SemaDC, LexicalDC, Reader.getContext()); } D->setLocation(ThisDeclLoc); if (HasAttrs) { AttrVec Attrs; Record.readAttributes(Attrs); // Avoid calling setAttrs() directly because it uses Decl::getASTContext() // internally which is unsafe during derialization. D->setAttrsImpl(Attrs, Reader.getContext()); } // Determine whether this declaration is part of a (sub)module. If so, it // may not yet be visible. bool ModulePrivate = (ModuleOwnership == Decl::ModuleOwnershipKind::ModulePrivate); if (unsigned SubmoduleID = readSubmoduleID()) { switch (ModuleOwnership) { case Decl::ModuleOwnershipKind::Visible: ModuleOwnership = Decl::ModuleOwnershipKind::VisibleWhenImported; break; case Decl::ModuleOwnershipKind::Unowned: case Decl::ModuleOwnershipKind::VisibleWhenImported: case Decl::ModuleOwnershipKind::ReachableWhenImported: case Decl::ModuleOwnershipKind::ModulePrivate: break; } D->setModuleOwnershipKind(ModuleOwnership); // Store the owning submodule ID in the declaration. D->setOwningModuleID(SubmoduleID); if (ModulePrivate) { // Module-private declarations are never visible, so there is no work to // do. } else if (Reader.getContext().getLangOpts().ModulesLocalVisibility) { // If local visibility is being tracked, this declaration will become // hidden and visible as the owning module does. } else if (Module *Owner = Reader.getSubmodule(SubmoduleID)) { // Mark the declaration as visible when its owning module becomes visible. if (Owner->NameVisibility == Module::AllVisible) D->setVisibleDespiteOwningModule(); else Reader.HiddenNamesMap[Owner].push_back(D); } } else if (ModulePrivate) { D->setModuleOwnershipKind(Decl::ModuleOwnershipKind::ModulePrivate); } } void ASTDeclReader::VisitPragmaCommentDecl(PragmaCommentDecl *D) { VisitDecl(D); D->setLocation(readSourceLocation()); D->CommentKind = (PragmaMSCommentKind)Record.readInt(); std::string Arg = readString(); memcpy(D->getTrailingObjects(), Arg.data(), Arg.size()); D->getTrailingObjects()[Arg.size()] = '\0'; } void ASTDeclReader::VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D) { VisitDecl(D); D->setLocation(readSourceLocation()); std::string Name = readString(); memcpy(D->getTrailingObjects(), Name.data(), Name.size()); D->getTrailingObjects()[Name.size()] = '\0'; D->ValueStart = Name.size() + 1; std::string Value = readString(); memcpy(D->getTrailingObjects() + D->ValueStart, Value.data(), Value.size()); D->getTrailingObjects()[D->ValueStart + Value.size()] = '\0'; } void ASTDeclReader::VisitTranslationUnitDecl(TranslationUnitDecl *TU) { llvm_unreachable("Translation units are not serialized"); } void ASTDeclReader::VisitNamedDecl(NamedDecl *ND) { VisitDecl(ND); ND->setDeclName(Record.readDeclarationName()); AnonymousDeclNumber = Record.readInt(); } void ASTDeclReader::VisitTypeDecl(TypeDecl *TD) { VisitNamedDecl(TD); TD->setLocStart(readSourceLocation()); // Delay type reading until after we have fully initialized the decl. DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTypedefNameDecl(TypedefNameDecl *TD) { RedeclarableResult Redecl = VisitRedeclarable(TD); VisitTypeDecl(TD); TypeSourceInfo *TInfo = readTypeSourceInfo(); if (Record.readInt()) { // isModed QualType modedT = Record.readType(); TD->setModedTypeSourceInfo(TInfo, modedT); } else TD->setTypeSourceInfo(TInfo); // Read and discard the declaration for which this is a typedef name for // linkage, if it exists. We cannot rely on our type to pull in this decl, // because it might have been merged with a type from another module and // thus might not refer to our version of the declaration. readDecl(); return Redecl; } void ASTDeclReader::VisitTypedefDecl(TypedefDecl *TD) { RedeclarableResult Redecl = VisitTypedefNameDecl(TD); mergeRedeclarable(TD, Redecl); } void ASTDeclReader::VisitTypeAliasDecl(TypeAliasDecl *TD) { RedeclarableResult Redecl = VisitTypedefNameDecl(TD); if (auto *Template = readDeclAs()) // Merged when we merge the template. TD->setDescribedAliasTemplate(Template); else mergeRedeclarable(TD, Redecl); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) { RedeclarableResult Redecl = VisitRedeclarable(TD); VisitTypeDecl(TD); TD->IdentifierNamespace = Record.readInt(); BitsUnpacker TagDeclBits(Record.readInt()); TD->setTagKind( static_cast(TagDeclBits.getNextBits(/*Width=*/3))); TD->setCompleteDefinition(TagDeclBits.getNextBit()); TD->setEmbeddedInDeclarator(TagDeclBits.getNextBit()); TD->setFreeStanding(TagDeclBits.getNextBit()); TD->setCompleteDefinitionRequired(TagDeclBits.getNextBit()); TD->setBraceRange(readSourceRange()); switch (TagDeclBits.getNextBits(/*Width=*/2)) { case 0: break; case 1: { // ExtInfo auto *Info = new (Reader.getContext()) TagDecl::ExtInfo(); Record.readQualifierInfo(*Info); TD->TypedefNameDeclOrQualifier = Info; break; } case 2: // TypedefNameForAnonDecl NamedDeclForTagDecl = readDeclID(); TypedefNameForLinkage = Record.readIdentifier(); break; default: llvm_unreachable("unexpected tag info kind"); } if (!isa(TD)) mergeRedeclarable(TD, Redecl); return Redecl; } void ASTDeclReader::VisitEnumDecl(EnumDecl *ED) { VisitTagDecl(ED); if (TypeSourceInfo *TI = readTypeSourceInfo()) ED->setIntegerTypeSourceInfo(TI); else ED->setIntegerType(Record.readType()); ED->setPromotionType(Record.readType()); BitsUnpacker EnumDeclBits(Record.readInt()); ED->setNumPositiveBits(EnumDeclBits.getNextBits(/*Width=*/8)); ED->setNumNegativeBits(EnumDeclBits.getNextBits(/*Width=*/8)); ED->setScoped(EnumDeclBits.getNextBit()); ED->setScopedUsingClassTag(EnumDeclBits.getNextBit()); ED->setFixed(EnumDeclBits.getNextBit()); ED->setHasODRHash(true); ED->ODRHash = Record.readInt(); // If this is a definition subject to the ODR, and we already have a // definition, merge this one into it. if (ED->isCompleteDefinition() && Reader.getContext().getLangOpts().Modules) { EnumDecl *&OldDef = Reader.EnumDefinitions[ED->getCanonicalDecl()]; if (!OldDef) { // This is the first time we've seen an imported definition. Look for a // local definition before deciding that we are the first definition. for (auto *D : merged_redecls(ED->getCanonicalDecl())) { if (!D->isFromASTFile() && D->isCompleteDefinition()) { OldDef = D; break; } } } if (OldDef) { Reader.MergedDeclContexts.insert(std::make_pair(ED, OldDef)); ED->demoteThisDefinitionToDeclaration(); Reader.mergeDefinitionVisibility(OldDef, ED); // We don't want to check the ODR hash value for declarations from global // module fragment. if (!shouldSkipCheckingODR(ED) && !shouldSkipCheckingODR(OldDef) && OldDef->getODRHash() != ED->getODRHash()) Reader.PendingEnumOdrMergeFailures[OldDef].push_back(ED); } else { OldDef = ED; } } if (auto *InstED = readDeclAs()) { auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); ED->setInstantiationOfMemberEnum(Reader.getContext(), InstED, TSK); ED->getMemberSpecializationInfo()->setPointOfInstantiation(POI); } } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRecordDeclImpl(RecordDecl *RD) { RedeclarableResult Redecl = VisitTagDecl(RD); BitsUnpacker RecordDeclBits(Record.readInt()); RD->setHasFlexibleArrayMember(RecordDeclBits.getNextBit()); RD->setAnonymousStructOrUnion(RecordDeclBits.getNextBit()); RD->setHasObjectMember(RecordDeclBits.getNextBit()); RD->setHasVolatileMember(RecordDeclBits.getNextBit()); RD->setNonTrivialToPrimitiveDefaultInitialize(RecordDeclBits.getNextBit()); RD->setNonTrivialToPrimitiveCopy(RecordDeclBits.getNextBit()); RD->setNonTrivialToPrimitiveDestroy(RecordDeclBits.getNextBit()); RD->setHasNonTrivialToPrimitiveDefaultInitializeCUnion( RecordDeclBits.getNextBit()); RD->setHasNonTrivialToPrimitiveDestructCUnion(RecordDeclBits.getNextBit()); RD->setHasNonTrivialToPrimitiveCopyCUnion(RecordDeclBits.getNextBit()); RD->setParamDestroyedInCallee(RecordDeclBits.getNextBit()); RD->setArgPassingRestrictions( (RecordArgPassingKind)RecordDeclBits.getNextBits(/*Width=*/2)); return Redecl; } void ASTDeclReader::VisitRecordDecl(RecordDecl *RD) { VisitRecordDeclImpl(RD); RD->setODRHash(Record.readInt()); // Maintain the invariant of a redeclaration chain containing only // a single definition. if (RD->isCompleteDefinition()) { RecordDecl *Canon = static_cast(RD->getCanonicalDecl()); RecordDecl *&OldDef = Reader.RecordDefinitions[Canon]; if (!OldDef) { // This is the first time we've seen an imported definition. Look for a // local definition before deciding that we are the first definition. for (auto *D : merged_redecls(Canon)) { if (!D->isFromASTFile() && D->isCompleteDefinition()) { OldDef = D; break; } } } if (OldDef) { Reader.MergedDeclContexts.insert(std::make_pair(RD, OldDef)); RD->demoteThisDefinitionToDeclaration(); Reader.mergeDefinitionVisibility(OldDef, RD); if (OldDef->getODRHash() != RD->getODRHash()) Reader.PendingRecordOdrMergeFailures[OldDef].push_back(RD); } else { OldDef = RD; } } } void ASTDeclReader::VisitValueDecl(ValueDecl *VD) { VisitNamedDecl(VD); // For function or variable declarations, defer reading the type in case the // declaration has a deduced type that references an entity declared within // the function definition or variable initializer. if (isa(VD)) DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); else VD->setType(Record.readType()); } void ASTDeclReader::VisitEnumConstantDecl(EnumConstantDecl *ECD) { VisitValueDecl(ECD); if (Record.readInt()) ECD->setInitExpr(Record.readExpr()); ECD->setInitVal(Reader.getContext(), Record.readAPSInt()); mergeMergeable(ECD); } void ASTDeclReader::VisitDeclaratorDecl(DeclaratorDecl *DD) { VisitValueDecl(DD); DD->setInnerLocStart(readSourceLocation()); if (Record.readInt()) { // hasExtInfo auto *Info = new (Reader.getContext()) DeclaratorDecl::ExtInfo(); Record.readQualifierInfo(*Info); Info->TrailingRequiresClause = Record.readExpr(); DD->DeclInfo = Info; } QualType TSIType = Record.readType(); DD->setTypeSourceInfo( TSIType.isNull() ? nullptr : Reader.getContext().CreateTypeSourceInfo(TSIType)); } void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) { RedeclarableResult Redecl = VisitRedeclarable(FD); FunctionDecl *Existing = nullptr; switch ((FunctionDecl::TemplatedKind)Record.readInt()) { case FunctionDecl::TK_NonTemplate: break; case FunctionDecl::TK_DependentNonTemplate: FD->setInstantiatedFromDecl(readDeclAs()); break; case FunctionDecl::TK_FunctionTemplate: { auto *Template = readDeclAs(); Template->init(FD); FD->setDescribedFunctionTemplate(Template); break; } case FunctionDecl::TK_MemberSpecialization: { auto *InstFD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); FD->setInstantiationOfMemberFunction(Reader.getContext(), InstFD, TSK); FD->getMemberSpecializationInfo()->setPointOfInstantiation(POI); break; } case FunctionDecl::TK_FunctionTemplateSpecialization: { auto *Template = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); // Template arguments. SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); // Template args as written. TemplateArgumentListInfo TemplArgsWritten; bool HasTemplateArgumentsAsWritten = Record.readBool(); if (HasTemplateArgumentsAsWritten) Record.readTemplateArgumentListInfo(TemplArgsWritten); SourceLocation POI = readSourceLocation(); ASTContext &C = Reader.getContext(); TemplateArgumentList *TemplArgList = TemplateArgumentList::CreateCopy(C, TemplArgs); MemberSpecializationInfo *MSInfo = nullptr; if (Record.readInt()) { auto *FD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); MSInfo = new (C) MemberSpecializationInfo(FD, TSK); MSInfo->setPointOfInstantiation(POI); } FunctionTemplateSpecializationInfo *FTInfo = FunctionTemplateSpecializationInfo::Create( C, FD, Template, TSK, TemplArgList, HasTemplateArgumentsAsWritten ? &TemplArgsWritten : nullptr, POI, MSInfo); FD->TemplateOrSpecialization = FTInfo; if (FD->isCanonicalDecl()) { // if canonical add to template's set. // The template that contains the specializations set. It's not safe to // use getCanonicalDecl on Template since it may still be initializing. auto *CanonTemplate = readDeclAs(); // Get the InsertPos by FindNodeOrInsertPos() instead of calling // InsertNode(FTInfo) directly to avoid the getASTContext() call in // FunctionTemplateSpecializationInfo's Profile(). // We avoid getASTContext because a decl in the parent hierarchy may // be initializing. llvm::FoldingSetNodeID ID; FunctionTemplateSpecializationInfo::Profile(ID, TemplArgs, C); void *InsertPos = nullptr; FunctionTemplateDecl::Common *CommonPtr = CanonTemplate->getCommonPtr(); FunctionTemplateSpecializationInfo *ExistingInfo = CommonPtr->Specializations.FindNodeOrInsertPos(ID, InsertPos); if (InsertPos) CommonPtr->Specializations.InsertNode(FTInfo, InsertPos); else { assert(Reader.getContext().getLangOpts().Modules && "already deserialized this template specialization"); Existing = ExistingInfo->getFunction(); } } break; } case FunctionDecl::TK_DependentFunctionTemplateSpecialization: { // Templates. UnresolvedSet<8> Candidates; unsigned NumCandidates = Record.readInt(); while (NumCandidates--) Candidates.addDecl(readDeclAs()); // Templates args. TemplateArgumentListInfo TemplArgsWritten; bool HasTemplateArgumentsAsWritten = Record.readBool(); if (HasTemplateArgumentsAsWritten) Record.readTemplateArgumentListInfo(TemplArgsWritten); FD->setDependentTemplateSpecialization( Reader.getContext(), Candidates, HasTemplateArgumentsAsWritten ? &TemplArgsWritten : nullptr); // These are not merged; we don't need to merge redeclarations of dependent // template friends. break; } } VisitDeclaratorDecl(FD); // Attach a type to this function. Use the real type if possible, but fall // back to the type as written if it involves a deduced return type. if (FD->getTypeSourceInfo() && FD->getTypeSourceInfo() ->getType() ->castAs() ->getReturnType() ->getContainedAutoType()) { // We'll set up the real type in Visit, once we've finished loading the // function. FD->setType(FD->getTypeSourceInfo()->getType()); Reader.PendingDeducedFunctionTypes.push_back({FD, DeferredTypeID}); } else { FD->setType(Reader.GetType(DeferredTypeID)); } DeferredTypeID = 0; FD->DNLoc = Record.readDeclarationNameLoc(FD->getDeclName()); FD->IdentifierNamespace = Record.readInt(); // FunctionDecl's body is handled last at ASTDeclReader::Visit, // after everything else is read. BitsUnpacker FunctionDeclBits(Record.readInt()); FD->setCachedLinkage((Linkage)FunctionDeclBits.getNextBits(/*Width=*/3)); FD->setStorageClass((StorageClass)FunctionDeclBits.getNextBits(/*Width=*/3)); FD->setInlineSpecified(FunctionDeclBits.getNextBit()); FD->setImplicitlyInline(FunctionDeclBits.getNextBit()); FD->setHasSkippedBody(FunctionDeclBits.getNextBit()); FD->setVirtualAsWritten(FunctionDeclBits.getNextBit()); // We defer calling `FunctionDecl::setPure()` here as for methods of // `CXXTemplateSpecializationDecl`s, we may not have connected up the // definition (which is required for `setPure`). const bool Pure = FunctionDeclBits.getNextBit(); FD->setHasInheritedPrototype(FunctionDeclBits.getNextBit()); FD->setHasWrittenPrototype(FunctionDeclBits.getNextBit()); FD->setDeletedAsWritten(FunctionDeclBits.getNextBit()); FD->setTrivial(FunctionDeclBits.getNextBit()); FD->setTrivialForCall(FunctionDeclBits.getNextBit()); FD->setDefaulted(FunctionDeclBits.getNextBit()); FD->setExplicitlyDefaulted(FunctionDeclBits.getNextBit()); FD->setIneligibleOrNotSelected(FunctionDeclBits.getNextBit()); FD->setConstexprKind( (ConstexprSpecKind)FunctionDeclBits.getNextBits(/*Width=*/2)); FD->setHasImplicitReturnZero(FunctionDeclBits.getNextBit()); FD->setIsMultiVersion(FunctionDeclBits.getNextBit()); FD->setLateTemplateParsed(FunctionDeclBits.getNextBit()); FD->setFriendConstraintRefersToEnclosingTemplate( FunctionDeclBits.getNextBit()); FD->setUsesSEHTry(FunctionDeclBits.getNextBit()); FD->EndRangeLoc = readSourceLocation(); if (FD->isExplicitlyDefaulted()) FD->setDefaultLoc(readSourceLocation()); FD->ODRHash = Record.readInt(); FD->setHasODRHash(true); if (FD->isDefaulted() || FD->isDeletedAsWritten()) { // If 'Info' is nonzero, we need to read an DefaultedOrDeletedInfo; if, // additionally, the second bit is also set, we also need to read // a DeletedMessage for the DefaultedOrDeletedInfo. if (auto Info = Record.readInt()) { bool HasMessage = Info & 2; StringLiteral *DeletedMessage = HasMessage ? cast(Record.readExpr()) : nullptr; unsigned NumLookups = Record.readInt(); SmallVector Lookups; for (unsigned I = 0; I != NumLookups; ++I) { NamedDecl *ND = Record.readDeclAs(); AccessSpecifier AS = (AccessSpecifier)Record.readInt(); Lookups.push_back(DeclAccessPair::make(ND, AS)); } FD->setDefaultedOrDeletedInfo( FunctionDecl::DefaultedOrDeletedFunctionInfo::Create( Reader.getContext(), Lookups, DeletedMessage)); } } if (Existing) mergeRedeclarable(FD, Existing, Redecl); else if (auto Kind = FD->getTemplatedKind(); Kind == FunctionDecl::TK_FunctionTemplate || Kind == FunctionDecl::TK_FunctionTemplateSpecialization) { // Function Templates have their FunctionTemplateDecls merged instead of // their FunctionDecls. auto merge = [this, &Redecl, FD](auto &&F) { auto *Existing = cast_or_null(Redecl.getKnownMergeTarget()); RedeclarableResult NewRedecl(Existing ? F(Existing) : nullptr, Redecl.getFirstID(), Redecl.isKeyDecl()); mergeRedeclarableTemplate(F(FD), NewRedecl); }; if (Kind == FunctionDecl::TK_FunctionTemplate) merge( [](FunctionDecl *FD) { return FD->getDescribedFunctionTemplate(); }); else merge([](FunctionDecl *FD) { return FD->getTemplateSpecializationInfo()->getTemplate(); }); } else mergeRedeclarable(FD, Redecl); // Defer calling `setPure` until merging above has guaranteed we've set // `DefinitionData` (as this will need to access it). FD->setIsPureVirtual(Pure); // Read in the parameters. unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); FD->setParams(Reader.getContext(), Params); } void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) { VisitNamedDecl(MD); if (Record.readInt()) { // Load the body on-demand. Most clients won't care, because method // definitions rarely show up in headers. Reader.PendingBodies[MD] = GetCurrentCursorOffset(); } MD->setSelfDecl(readDeclAs()); MD->setCmdDecl(readDeclAs()); MD->setInstanceMethod(Record.readInt()); MD->setVariadic(Record.readInt()); MD->setPropertyAccessor(Record.readInt()); MD->setSynthesizedAccessorStub(Record.readInt()); MD->setDefined(Record.readInt()); MD->setOverriding(Record.readInt()); MD->setHasSkippedBody(Record.readInt()); MD->setIsRedeclaration(Record.readInt()); MD->setHasRedeclaration(Record.readInt()); if (MD->hasRedeclaration()) Reader.getContext().setObjCMethodRedeclaration(MD, readDeclAs()); MD->setDeclImplementation( static_cast(Record.readInt())); MD->setObjCDeclQualifier((Decl::ObjCDeclQualifier)Record.readInt()); MD->setRelatedResultType(Record.readInt()); MD->setReturnType(Record.readType()); MD->setReturnTypeSourceInfo(readTypeSourceInfo()); MD->DeclEndLoc = readSourceLocation(); unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); MD->setSelLocsKind((SelectorLocationsKind)Record.readInt()); unsigned NumStoredSelLocs = Record.readInt(); SmallVector SelLocs; SelLocs.reserve(NumStoredSelLocs); for (unsigned i = 0; i != NumStoredSelLocs; ++i) SelLocs.push_back(readSourceLocation()); MD->setParamsAndSelLocs(Reader.getContext(), Params, SelLocs); } void ASTDeclReader::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) { VisitTypedefNameDecl(D); D->Variance = Record.readInt(); D->Index = Record.readInt(); D->VarianceLoc = readSourceLocation(); D->ColonLoc = readSourceLocation(); } void ASTDeclReader::VisitObjCContainerDecl(ObjCContainerDecl *CD) { VisitNamedDecl(CD); CD->setAtStartLoc(readSourceLocation()); CD->setAtEndRange(readSourceRange()); } ObjCTypeParamList *ASTDeclReader::ReadObjCTypeParamList() { unsigned numParams = Record.readInt(); if (numParams == 0) return nullptr; SmallVector typeParams; typeParams.reserve(numParams); for (unsigned i = 0; i != numParams; ++i) { auto *typeParam = readDeclAs(); if (!typeParam) return nullptr; typeParams.push_back(typeParam); } SourceLocation lAngleLoc = readSourceLocation(); SourceLocation rAngleLoc = readSourceLocation(); return ObjCTypeParamList::create(Reader.getContext(), lAngleLoc, typeParams, rAngleLoc); } void ASTDeclReader::ReadObjCDefinitionData( struct ObjCInterfaceDecl::DefinitionData &Data) { // Read the superclass. Data.SuperClassTInfo = readTypeSourceInfo(); Data.EndLoc = readSourceLocation(); Data.HasDesignatedInitializers = Record.readInt(); Data.ODRHash = Record.readInt(); Data.HasODRHash = true; // Read the directly referenced protocols and their SourceLocations. unsigned NumProtocols = Record.readInt(); SmallVector Protocols; Protocols.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) Protocols.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) ProtoLocs.push_back(readSourceLocation()); Data.ReferencedProtocols.set(Protocols.data(), NumProtocols, ProtoLocs.data(), Reader.getContext()); // Read the transitive closure of protocols referenced by this class. NumProtocols = Record.readInt(); Protocols.clear(); Protocols.reserve(NumProtocols); for (unsigned I = 0; I != NumProtocols; ++I) Protocols.push_back(readDeclAs()); Data.AllReferencedProtocols.set(Protocols.data(), NumProtocols, Reader.getContext()); } void ASTDeclReader::MergeDefinitionData(ObjCInterfaceDecl *D, struct ObjCInterfaceDecl::DefinitionData &&NewDD) { struct ObjCInterfaceDecl::DefinitionData &DD = D->data(); if (DD.Definition == NewDD.Definition) return; Reader.MergedDeclContexts.insert( std::make_pair(NewDD.Definition, DD.Definition)); Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition); if (D->getODRHash() != NewDD.ODRHash) Reader.PendingObjCInterfaceOdrMergeFailures[DD.Definition].push_back( {NewDD.Definition, &NewDD}); } void ASTDeclReader::VisitObjCInterfaceDecl(ObjCInterfaceDecl *ID) { RedeclarableResult Redecl = VisitRedeclarable(ID); VisitObjCContainerDecl(ID); DeferredTypeID = Record.getGlobalTypeID(Record.readInt()); mergeRedeclarable(ID, Redecl); ID->TypeParamList = ReadObjCTypeParamList(); if (Record.readInt()) { // Read the definition. ID->allocateDefinitionData(); ReadObjCDefinitionData(ID->data()); ObjCInterfaceDecl *Canon = ID->getCanonicalDecl(); if (Canon->Data.getPointer()) { // If we already have a definition, keep the definition invariant and // merge the data. MergeDefinitionData(Canon, std::move(ID->data())); ID->Data = Canon->Data; } else { // Set the definition data of the canonical declaration, so other // redeclarations will see it. ID->getCanonicalDecl()->Data = ID->Data; // We will rebuild this list lazily. ID->setIvarList(nullptr); } // Note that we have deserialized a definition. Reader.PendingDefinitions.insert(ID); // Note that we've loaded this Objective-C class. Reader.ObjCClassesLoaded.push_back(ID); } else { ID->Data = ID->getCanonicalDecl()->Data; } } void ASTDeclReader::VisitObjCIvarDecl(ObjCIvarDecl *IVD) { VisitFieldDecl(IVD); IVD->setAccessControl((ObjCIvarDecl::AccessControl)Record.readInt()); // This field will be built lazily. IVD->setNextIvar(nullptr); bool synth = Record.readInt(); IVD->setSynthesize(synth); // Check ivar redeclaration. if (IVD->isInvalidDecl()) return; // Don't check ObjCInterfaceDecl as interfaces are named and mismatches can be // detected in VisitObjCInterfaceDecl. Here we are looking for redeclarations // in extensions. if (isa(IVD->getDeclContext())) return; ObjCInterfaceDecl *CanonIntf = IVD->getContainingInterface()->getCanonicalDecl(); IdentifierInfo *II = IVD->getIdentifier(); ObjCIvarDecl *PrevIvar = CanonIntf->lookupInstanceVariable(II); if (PrevIvar && PrevIvar != IVD) { auto *ParentExt = dyn_cast(IVD->getDeclContext()); auto *PrevParentExt = dyn_cast(PrevIvar->getDeclContext()); if (ParentExt && PrevParentExt) { // Postpone diagnostic as we should merge identical extensions from // different modules. Reader .PendingObjCExtensionIvarRedeclarations[std::make_pair(ParentExt, PrevParentExt)] .push_back(std::make_pair(IVD, PrevIvar)); } else if (ParentExt || PrevParentExt) { // Duplicate ivars in extension + implementation are never compatible. // Compatibility of implementation + implementation should be handled in // VisitObjCImplementationDecl. Reader.Diag(IVD->getLocation(), diag::err_duplicate_ivar_declaration) << II; Reader.Diag(PrevIvar->getLocation(), diag::note_previous_definition); } } } void ASTDeclReader::ReadObjCDefinitionData( struct ObjCProtocolDecl::DefinitionData &Data) { unsigned NumProtoRefs = Record.readInt(); SmallVector ProtoRefs; ProtoRefs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoRefs.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoLocs.push_back(readSourceLocation()); Data.ReferencedProtocols.set(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(), Reader.getContext()); Data.ODRHash = Record.readInt(); Data.HasODRHash = true; } void ASTDeclReader::MergeDefinitionData( ObjCProtocolDecl *D, struct ObjCProtocolDecl::DefinitionData &&NewDD) { struct ObjCProtocolDecl::DefinitionData &DD = D->data(); if (DD.Definition == NewDD.Definition) return; Reader.MergedDeclContexts.insert( std::make_pair(NewDD.Definition, DD.Definition)); Reader.mergeDefinitionVisibility(DD.Definition, NewDD.Definition); if (D->getODRHash() != NewDD.ODRHash) Reader.PendingObjCProtocolOdrMergeFailures[DD.Definition].push_back( {NewDD.Definition, &NewDD}); } void ASTDeclReader::VisitObjCProtocolDecl(ObjCProtocolDecl *PD) { RedeclarableResult Redecl = VisitRedeclarable(PD); VisitObjCContainerDecl(PD); mergeRedeclarable(PD, Redecl); if (Record.readInt()) { // Read the definition. PD->allocateDefinitionData(); ReadObjCDefinitionData(PD->data()); ObjCProtocolDecl *Canon = PD->getCanonicalDecl(); if (Canon->Data.getPointer()) { // If we already have a definition, keep the definition invariant and // merge the data. MergeDefinitionData(Canon, std::move(PD->data())); PD->Data = Canon->Data; } else { // Set the definition data of the canonical declaration, so other // redeclarations will see it. PD->getCanonicalDecl()->Data = PD->Data; } // Note that we have deserialized a definition. Reader.PendingDefinitions.insert(PD); } else { PD->Data = PD->getCanonicalDecl()->Data; } } void ASTDeclReader::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *FD) { VisitFieldDecl(FD); } void ASTDeclReader::VisitObjCCategoryDecl(ObjCCategoryDecl *CD) { VisitObjCContainerDecl(CD); CD->setCategoryNameLoc(readSourceLocation()); CD->setIvarLBraceLoc(readSourceLocation()); CD->setIvarRBraceLoc(readSourceLocation()); // Note that this category has been deserialized. We do this before // deserializing the interface declaration, so that it will consider this /// category. Reader.CategoriesDeserialized.insert(CD); CD->ClassInterface = readDeclAs(); CD->TypeParamList = ReadObjCTypeParamList(); unsigned NumProtoRefs = Record.readInt(); SmallVector ProtoRefs; ProtoRefs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoRefs.push_back(readDeclAs()); SmallVector ProtoLocs; ProtoLocs.reserve(NumProtoRefs); for (unsigned I = 0; I != NumProtoRefs; ++I) ProtoLocs.push_back(readSourceLocation()); CD->setProtocolList(ProtoRefs.data(), NumProtoRefs, ProtoLocs.data(), Reader.getContext()); // Protocols in the class extension belong to the class. if (NumProtoRefs > 0 && CD->ClassInterface && CD->IsClassExtension()) CD->ClassInterface->mergeClassExtensionProtocolList( (ObjCProtocolDecl *const *)ProtoRefs.data(), NumProtoRefs, Reader.getContext()); } void ASTDeclReader::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *CAD) { VisitNamedDecl(CAD); CAD->setClassInterface(readDeclAs()); } void ASTDeclReader::VisitObjCPropertyDecl(ObjCPropertyDecl *D) { VisitNamedDecl(D); D->setAtLoc(readSourceLocation()); D->setLParenLoc(readSourceLocation()); QualType T = Record.readType(); TypeSourceInfo *TSI = readTypeSourceInfo(); D->setType(T, TSI); D->setPropertyAttributes((ObjCPropertyAttribute::Kind)Record.readInt()); D->setPropertyAttributesAsWritten( (ObjCPropertyAttribute::Kind)Record.readInt()); D->setPropertyImplementation( (ObjCPropertyDecl::PropertyControl)Record.readInt()); DeclarationName GetterName = Record.readDeclarationName(); SourceLocation GetterLoc = readSourceLocation(); D->setGetterName(GetterName.getObjCSelector(), GetterLoc); DeclarationName SetterName = Record.readDeclarationName(); SourceLocation SetterLoc = readSourceLocation(); D->setSetterName(SetterName.getObjCSelector(), SetterLoc); D->setGetterMethodDecl(readDeclAs()); D->setSetterMethodDecl(readDeclAs()); D->setPropertyIvarDecl(readDeclAs()); } void ASTDeclReader::VisitObjCImplDecl(ObjCImplDecl *D) { VisitObjCContainerDecl(D); D->setClassInterface(readDeclAs()); } void ASTDeclReader::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) { VisitObjCImplDecl(D); D->CategoryNameLoc = readSourceLocation(); } void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) { VisitObjCImplDecl(D); D->setSuperClass(readDeclAs()); D->SuperLoc = readSourceLocation(); D->setIvarLBraceLoc(readSourceLocation()); D->setIvarRBraceLoc(readSourceLocation()); D->setHasNonZeroConstructors(Record.readInt()); D->setHasDestructors(Record.readInt()); D->NumIvarInitializers = Record.readInt(); if (D->NumIvarInitializers) D->IvarInitializers = ReadGlobalOffset(); } void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) { VisitDecl(D); D->setAtLoc(readSourceLocation()); D->setPropertyDecl(readDeclAs()); D->PropertyIvarDecl = readDeclAs(); D->IvarLoc = readSourceLocation(); D->setGetterMethodDecl(readDeclAs()); D->setSetterMethodDecl(readDeclAs()); D->setGetterCXXConstructor(Record.readExpr()); D->setSetterCXXAssignment(Record.readExpr()); } void ASTDeclReader::VisitFieldDecl(FieldDecl *FD) { VisitDeclaratorDecl(FD); FD->Mutable = Record.readInt(); unsigned Bits = Record.readInt(); FD->StorageKind = Bits >> 1; if (FD->StorageKind == FieldDecl::ISK_CapturedVLAType) FD->CapturedVLAType = cast(Record.readType().getTypePtr()); else if (Bits & 1) FD->setBitWidth(Record.readExpr()); if (!FD->getDeclName()) { if (auto *Tmpl = readDeclAs()) Reader.getContext().setInstantiatedFromUnnamedFieldDecl(FD, Tmpl); } mergeMergeable(FD); } void ASTDeclReader::VisitMSPropertyDecl(MSPropertyDecl *PD) { VisitDeclaratorDecl(PD); PD->GetterId = Record.readIdentifier(); PD->SetterId = Record.readIdentifier(); } void ASTDeclReader::VisitMSGuidDecl(MSGuidDecl *D) { VisitValueDecl(D); D->PartVal.Part1 = Record.readInt(); D->PartVal.Part2 = Record.readInt(); D->PartVal.Part3 = Record.readInt(); for (auto &C : D->PartVal.Part4And5) C = Record.readInt(); // Add this GUID to the AST context's lookup structure, and merge if needed. if (MSGuidDecl *Existing = Reader.getContext().MSGuidDecls.GetOrInsertNode(D)) Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl()); } void ASTDeclReader::VisitUnnamedGlobalConstantDecl( UnnamedGlobalConstantDecl *D) { VisitValueDecl(D); D->Value = Record.readAPValue(); // Add this to the AST context's lookup structure, and merge if needed. if (UnnamedGlobalConstantDecl *Existing = Reader.getContext().UnnamedGlobalConstantDecls.GetOrInsertNode(D)) Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl()); } void ASTDeclReader::VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D) { VisitValueDecl(D); D->Value = Record.readAPValue(); // Add this template parameter object to the AST context's lookup structure, // and merge if needed. if (TemplateParamObjectDecl *Existing = Reader.getContext().TemplateParamObjectDecls.GetOrInsertNode(D)) Reader.getContext().setPrimaryMergedDecl(D, Existing->getCanonicalDecl()); } void ASTDeclReader::VisitIndirectFieldDecl(IndirectFieldDecl *FD) { VisitValueDecl(FD); FD->ChainingSize = Record.readInt(); assert(FD->ChainingSize >= 2 && "Anonymous chaining must be >= 2"); FD->Chaining = new (Reader.getContext())NamedDecl*[FD->ChainingSize]; for (unsigned I = 0; I != FD->ChainingSize; ++I) FD->Chaining[I] = readDeclAs(); mergeMergeable(FD); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) { RedeclarableResult Redecl = VisitRedeclarable(VD); VisitDeclaratorDecl(VD); BitsUnpacker VarDeclBits(Record.readInt()); auto VarLinkage = Linkage(VarDeclBits.getNextBits(/*Width=*/3)); bool DefGeneratedInModule = VarDeclBits.getNextBit(); VD->VarDeclBits.SClass = (StorageClass)VarDeclBits.getNextBits(/*Width=*/3); VD->VarDeclBits.TSCSpec = VarDeclBits.getNextBits(/*Width=*/2); VD->VarDeclBits.InitStyle = VarDeclBits.getNextBits(/*Width=*/2); VD->VarDeclBits.ARCPseudoStrong = VarDeclBits.getNextBit(); bool HasDeducedType = false; if (!isa(VD)) { VD->NonParmVarDeclBits.IsThisDeclarationADemotedDefinition = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.ExceptionVar = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.NRVOVariable = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.CXXForRangeDecl = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsInline = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsInlineSpecified = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsConstexpr = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.IsInitCapture = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.PreviousDeclInSameBlockScope = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.EscapingByref = VarDeclBits.getNextBit(); HasDeducedType = VarDeclBits.getNextBit(); VD->NonParmVarDeclBits.ImplicitParamKind = VarDeclBits.getNextBits(/*Width*/ 3); VD->NonParmVarDeclBits.ObjCForDecl = VarDeclBits.getNextBit(); } // If this variable has a deduced type, defer reading that type until we are // done deserializing this variable, because the type might refer back to the // variable. if (HasDeducedType) Reader.PendingDeducedVarTypes.push_back({VD, DeferredTypeID}); else VD->setType(Reader.GetType(DeferredTypeID)); DeferredTypeID = 0; VD->setCachedLinkage(VarLinkage); // Reconstruct the one piece of the IdentifierNamespace that we need. if (VD->getStorageClass() == SC_Extern && VarLinkage != Linkage::None && VD->getLexicalDeclContext()->isFunctionOrMethod()) VD->setLocalExternDecl(); if (DefGeneratedInModule) { Reader.DefinitionSource[VD] = Loc.F->Kind == ModuleKind::MK_MainFile || Reader.getContext().getLangOpts().BuildingPCHWithObjectFile; } if (VD->hasAttr()) { Expr *CopyExpr = Record.readExpr(); if (CopyExpr) Reader.getContext().setBlockVarCopyInit(VD, CopyExpr, Record.readInt()); } enum VarKind { VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization }; switch ((VarKind)Record.readInt()) { case VarNotTemplate: // Only true variables (not parameters or implicit parameters) can be // merged; the other kinds are not really redeclarable at all. if (!isa(VD) && !isa(VD) && !isa(VD)) mergeRedeclarable(VD, Redecl); break; case VarTemplate: // Merged when we merge the template. VD->setDescribedVarTemplate(readDeclAs()); break; case StaticDataMemberSpecialization: { // HasMemberSpecializationInfo. auto *Tmpl = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); Reader.getContext().setInstantiatedFromStaticDataMember(VD, Tmpl, TSK,POI); mergeRedeclarable(VD, Redecl); break; } } return Redecl; } void ASTDeclReader::ReadVarDeclInit(VarDecl *VD) { if (uint64_t Val = Record.readInt()) { EvaluatedStmt *Eval = VD->ensureEvaluatedStmt(); Eval->HasConstantInitialization = (Val & 2) != 0; Eval->HasConstantDestruction = (Val & 4) != 0; Eval->WasEvaluated = (Val & 8) != 0; if (Eval->WasEvaluated) { Eval->Evaluated = Record.readAPValue(); if (Eval->Evaluated.needsCleanup()) Reader.getContext().addDestruction(&Eval->Evaluated); } // Store the offset of the initializer. Don't deserialize it yet: it might // not be needed, and might refer back to the variable, for example if it // contains a lambda. Eval->Value = GetCurrentCursorOffset(); } } void ASTDeclReader::VisitImplicitParamDecl(ImplicitParamDecl *PD) { VisitVarDecl(PD); } void ASTDeclReader::VisitParmVarDecl(ParmVarDecl *PD) { VisitVarDecl(PD); unsigned scopeIndex = Record.readInt(); BitsUnpacker ParmVarDeclBits(Record.readInt()); unsigned isObjCMethodParam = ParmVarDeclBits.getNextBit(); unsigned scopeDepth = ParmVarDeclBits.getNextBits(/*Width=*/7); unsigned declQualifier = ParmVarDeclBits.getNextBits(/*Width=*/7); if (isObjCMethodParam) { assert(scopeDepth == 0); PD->setObjCMethodScopeInfo(scopeIndex); PD->ParmVarDeclBits.ScopeDepthOrObjCQuals = declQualifier; } else { PD->setScopeInfo(scopeDepth, scopeIndex); } PD->ParmVarDeclBits.IsKNRPromoted = ParmVarDeclBits.getNextBit(); PD->ParmVarDeclBits.HasInheritedDefaultArg = ParmVarDeclBits.getNextBit(); if (ParmVarDeclBits.getNextBit()) // hasUninstantiatedDefaultArg. PD->setUninstantiatedDefaultArg(Record.readExpr()); if (ParmVarDeclBits.getNextBit()) // Valid explicit object parameter PD->ExplicitObjectParameterIntroducerLoc = Record.readSourceLocation(); // FIXME: If this is a redeclaration of a function from another module, handle // inheritance of default arguments. } void ASTDeclReader::VisitDecompositionDecl(DecompositionDecl *DD) { VisitVarDecl(DD); auto **BDs = DD->getTrailingObjects(); for (unsigned I = 0; I != DD->NumBindings; ++I) { BDs[I] = readDeclAs(); BDs[I]->setDecomposedDecl(DD); } } void ASTDeclReader::VisitBindingDecl(BindingDecl *BD) { VisitValueDecl(BD); BD->Binding = Record.readExpr(); } void ASTDeclReader::VisitFileScopeAsmDecl(FileScopeAsmDecl *AD) { VisitDecl(AD); AD->setAsmString(cast(Record.readExpr())); AD->setRParenLoc(readSourceLocation()); } void ASTDeclReader::VisitTopLevelStmtDecl(TopLevelStmtDecl *D) { VisitDecl(D); D->Statement = Record.readStmt(); } void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) { VisitDecl(BD); BD->setBody(cast_or_null(Record.readStmt())); BD->setSignatureAsWritten(readTypeSourceInfo()); unsigned NumParams = Record.readInt(); SmallVector Params; Params.reserve(NumParams); for (unsigned I = 0; I != NumParams; ++I) Params.push_back(readDeclAs()); BD->setParams(Params); BD->setIsVariadic(Record.readInt()); BD->setBlockMissingReturnType(Record.readInt()); BD->setIsConversionFromLambda(Record.readInt()); BD->setDoesNotEscape(Record.readInt()); BD->setCanAvoidCopyToHeap(Record.readInt()); bool capturesCXXThis = Record.readInt(); unsigned numCaptures = Record.readInt(); SmallVector captures; captures.reserve(numCaptures); for (unsigned i = 0; i != numCaptures; ++i) { auto *decl = readDeclAs(); unsigned flags = Record.readInt(); bool byRef = (flags & 1); bool nested = (flags & 2); Expr *copyExpr = ((flags & 4) ? Record.readExpr() : nullptr); captures.push_back(BlockDecl::Capture(decl, byRef, nested, copyExpr)); } BD->setCaptures(Reader.getContext(), captures, capturesCXXThis); } void ASTDeclReader::VisitCapturedDecl(CapturedDecl *CD) { VisitDecl(CD); unsigned ContextParamPos = Record.readInt(); CD->setNothrow(Record.readInt() != 0); // Body is set by VisitCapturedStmt. for (unsigned I = 0; I < CD->NumParams; ++I) { if (I != ContextParamPos) CD->setParam(I, readDeclAs()); else CD->setContextParam(I, readDeclAs()); } } void ASTDeclReader::VisitLinkageSpecDecl(LinkageSpecDecl *D) { VisitDecl(D); D->setLanguage(static_cast(Record.readInt())); D->setExternLoc(readSourceLocation()); D->setRBraceLoc(readSourceLocation()); } void ASTDeclReader::VisitExportDecl(ExportDecl *D) { VisitDecl(D); D->RBraceLoc = readSourceLocation(); } void ASTDeclReader::VisitLabelDecl(LabelDecl *D) { VisitNamedDecl(D); D->setLocStart(readSourceLocation()); } void ASTDeclReader::VisitNamespaceDecl(NamespaceDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); BitsUnpacker NamespaceDeclBits(Record.readInt()); D->setInline(NamespaceDeclBits.getNextBit()); D->setNested(NamespaceDeclBits.getNextBit()); D->LocStart = readSourceLocation(); D->RBraceLoc = readSourceLocation(); // Defer loading the anonymous namespace until we've finished merging // this namespace; loading it might load a later declaration of the // same namespace, and we have an invariant that older declarations // get merged before newer ones try to merge. GlobalDeclID AnonNamespace; if (Redecl.getFirstID() == ThisDeclID) AnonNamespace = readDeclID(); mergeRedeclarable(D, Redecl); if (AnonNamespace.isValid()) { // Each module has its own anonymous namespace, which is disjoint from // any other module's anonymous namespaces, so don't attach the anonymous // namespace at all. auto *Anon = cast(Reader.GetDecl(AnonNamespace)); if (!Record.isModule()) D->setAnonymousNamespace(Anon); } } void ASTDeclReader::VisitHLSLBufferDecl(HLSLBufferDecl *D) { VisitNamedDecl(D); VisitDeclContext(D); D->IsCBuffer = Record.readBool(); D->KwLoc = readSourceLocation(); D->LBraceLoc = readSourceLocation(); D->RBraceLoc = readSourceLocation(); } void ASTDeclReader::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); D->NamespaceLoc = readSourceLocation(); D->IdentLoc = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->Namespace = readDeclAs(); mergeRedeclarable(D, Redecl); } void ASTDeclReader::VisitUsingDecl(UsingDecl *D) { VisitNamedDecl(D); D->setUsingLoc(readSourceLocation()); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName()); D->FirstUsingShadow.setPointer(readDeclAs()); D->setTypename(Record.readInt()); if (auto *Pattern = readDeclAs()) Reader.getContext().setInstantiatedFromUsingDecl(D, Pattern); mergeMergeable(D); } void ASTDeclReader::VisitUsingEnumDecl(UsingEnumDecl *D) { VisitNamedDecl(D); D->setUsingLoc(readSourceLocation()); D->setEnumLoc(readSourceLocation()); D->setEnumType(Record.readTypeSourceInfo()); D->FirstUsingShadow.setPointer(readDeclAs()); if (auto *Pattern = readDeclAs()) Reader.getContext().setInstantiatedFromUsingEnumDecl(D, Pattern); mergeMergeable(D); } void ASTDeclReader::VisitUsingPackDecl(UsingPackDecl *D) { VisitNamedDecl(D); D->InstantiatedFrom = readDeclAs(); auto **Expansions = D->getTrailingObjects(); for (unsigned I = 0; I != D->NumExpansions; ++I) Expansions[I] = readDeclAs(); mergeMergeable(D); } void ASTDeclReader::VisitUsingShadowDecl(UsingShadowDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); VisitNamedDecl(D); D->Underlying = readDeclAs(); D->IdentifierNamespace = Record.readInt(); D->UsingOrNextShadow = readDeclAs(); auto *Pattern = readDeclAs(); if (Pattern) Reader.getContext().setInstantiatedFromUsingShadowDecl(D, Pattern); mergeRedeclarable(D, Redecl); } void ASTDeclReader::VisitConstructorUsingShadowDecl( ConstructorUsingShadowDecl *D) { VisitUsingShadowDecl(D); D->NominatedBaseClassShadowDecl = readDeclAs(); D->ConstructedBaseClassShadowDecl = readDeclAs(); D->IsVirtual = Record.readInt(); } void ASTDeclReader::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) { VisitNamedDecl(D); D->UsingLoc = readSourceLocation(); D->NamespaceLoc = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->NominatedNamespace = readDeclAs(); D->CommonAncestor = readDeclAs(); } void ASTDeclReader::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) { VisitValueDecl(D); D->setUsingLoc(readSourceLocation()); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->DNLoc = Record.readDeclarationNameLoc(D->getDeclName()); D->EllipsisLoc = readSourceLocation(); mergeMergeable(D); } void ASTDeclReader::VisitUnresolvedUsingTypenameDecl( UnresolvedUsingTypenameDecl *D) { VisitTypeDecl(D); D->TypenameLocation = readSourceLocation(); D->QualifierLoc = Record.readNestedNameSpecifierLoc(); D->EllipsisLoc = readSourceLocation(); mergeMergeable(D); } void ASTDeclReader::VisitUnresolvedUsingIfExistsDecl( UnresolvedUsingIfExistsDecl *D) { VisitNamedDecl(D); } void ASTDeclReader::ReadCXXDefinitionData( struct CXXRecordDecl::DefinitionData &Data, const CXXRecordDecl *D, Decl *LambdaContext, unsigned IndexInLambdaContext) { BitsUnpacker CXXRecordDeclBits = Record.readInt(); #define FIELD(Name, Width, Merge) \ if (!CXXRecordDeclBits.canGetNextNBits(Width)) \ CXXRecordDeclBits.updateValue(Record.readInt()); \ Data.Name = CXXRecordDeclBits.getNextBits(Width); #include "clang/AST/CXXRecordDeclDefinitionBits.def" #undef FIELD // Note: the caller has deserialized the IsLambda bit already. Data.ODRHash = Record.readInt(); Data.HasODRHash = true; if (Record.readInt()) { Reader.DefinitionSource[D] = Loc.F->Kind == ModuleKind::MK_MainFile || Reader.getContext().getLangOpts().BuildingPCHWithObjectFile; } Record.readUnresolvedSet(Data.Conversions); Data.ComputedVisibleConversions = Record.readInt(); if (Data.ComputedVisibleConversions) Record.readUnresolvedSet(Data.VisibleConversions); assert(Data.Definition && "Data.Definition should be already set!"); if (!Data.IsLambda) { assert(!LambdaContext && !IndexInLambdaContext && "given lambda context for non-lambda"); Data.NumBases = Record.readInt(); if (Data.NumBases) Data.Bases = ReadGlobalOffset(); Data.NumVBases = Record.readInt(); if (Data.NumVBases) Data.VBases = ReadGlobalOffset(); Data.FirstFriend = readDeclID().getRawValue(); } else { using Capture = LambdaCapture; auto &Lambda = static_cast(Data); BitsUnpacker LambdaBits(Record.readInt()); Lambda.DependencyKind = LambdaBits.getNextBits(/*Width=*/2); Lambda.IsGenericLambda = LambdaBits.getNextBit(); Lambda.CaptureDefault = LambdaBits.getNextBits(/*Width=*/2); Lambda.NumCaptures = LambdaBits.getNextBits(/*Width=*/15); Lambda.HasKnownInternalLinkage = LambdaBits.getNextBit(); Lambda.NumExplicitCaptures = Record.readInt(); Lambda.ManglingNumber = Record.readInt(); if (unsigned DeviceManglingNumber = Record.readInt()) Reader.getContext().DeviceLambdaManglingNumbers[D] = DeviceManglingNumber; Lambda.IndexInContext = IndexInLambdaContext; Lambda.ContextDecl = LambdaContext; Capture *ToCapture = nullptr; if (Lambda.NumCaptures) { ToCapture = (Capture *)Reader.getContext().Allocate(sizeof(Capture) * Lambda.NumCaptures); Lambda.AddCaptureList(Reader.getContext(), ToCapture); } Lambda.MethodTyInfo = readTypeSourceInfo(); for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) { SourceLocation Loc = readSourceLocation(); BitsUnpacker CaptureBits(Record.readInt()); bool IsImplicit = CaptureBits.getNextBit(); auto Kind = static_cast(CaptureBits.getNextBits(/*Width=*/3)); switch (Kind) { case LCK_StarThis: case LCK_This: case LCK_VLAType: new (ToCapture) Capture(Loc, IsImplicit, Kind, nullptr, SourceLocation()); ToCapture++; break; case LCK_ByCopy: case LCK_ByRef: auto *Var = readDeclAs(); SourceLocation EllipsisLoc = readSourceLocation(); new (ToCapture) Capture(Loc, IsImplicit, Kind, Var, EllipsisLoc); ToCapture++; break; } } } } void ASTDeclReader::MergeDefinitionData( CXXRecordDecl *D, struct CXXRecordDecl::DefinitionData &&MergeDD) { assert(D->DefinitionData && "merging class definition into non-definition"); auto &DD = *D->DefinitionData; if (DD.Definition != MergeDD.Definition) { // Track that we merged the definitions. Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition, DD.Definition)); Reader.PendingDefinitions.erase(MergeDD.Definition); MergeDD.Definition->setCompleteDefinition(false); Reader.mergeDefinitionVisibility(DD.Definition, MergeDD.Definition); assert(!Reader.Lookups.contains(MergeDD.Definition) && "already loaded pending lookups for merged definition"); } auto PFDI = Reader.PendingFakeDefinitionData.find(&DD); if (PFDI != Reader.PendingFakeDefinitionData.end() && PFDI->second == ASTReader::PendingFakeDefinitionKind::Fake) { // We faked up this definition data because we found a class for which we'd // not yet loaded the definition. Replace it with the real thing now. assert(!DD.IsLambda && !MergeDD.IsLambda && "faked up lambda definition?"); PFDI->second = ASTReader::PendingFakeDefinitionKind::FakeLoaded; // Don't change which declaration is the definition; that is required // to be invariant once we select it. auto *Def = DD.Definition; DD = std::move(MergeDD); DD.Definition = Def; return; } bool DetectedOdrViolation = false; #define FIELD(Name, Width, Merge) Merge(Name) #define MERGE_OR(Field) DD.Field |= MergeDD.Field; #define NO_MERGE(Field) \ DetectedOdrViolation |= DD.Field != MergeDD.Field; \ MERGE_OR(Field) #include "clang/AST/CXXRecordDeclDefinitionBits.def" NO_MERGE(IsLambda) #undef NO_MERGE #undef MERGE_OR if (DD.NumBases != MergeDD.NumBases || DD.NumVBases != MergeDD.NumVBases) DetectedOdrViolation = true; // FIXME: Issue a diagnostic if the base classes don't match when we come // to lazily load them. // FIXME: Issue a diagnostic if the list of conversion functions doesn't // match when we come to lazily load them. if (MergeDD.ComputedVisibleConversions && !DD.ComputedVisibleConversions) { DD.VisibleConversions = std::move(MergeDD.VisibleConversions); DD.ComputedVisibleConversions = true; } // FIXME: Issue a diagnostic if FirstFriend doesn't match when we come to // lazily load it. if (DD.IsLambda) { auto &Lambda1 = static_cast(DD); auto &Lambda2 = static_cast(MergeDD); DetectedOdrViolation |= Lambda1.DependencyKind != Lambda2.DependencyKind; DetectedOdrViolation |= Lambda1.IsGenericLambda != Lambda2.IsGenericLambda; DetectedOdrViolation |= Lambda1.CaptureDefault != Lambda2.CaptureDefault; DetectedOdrViolation |= Lambda1.NumCaptures != Lambda2.NumCaptures; DetectedOdrViolation |= Lambda1.NumExplicitCaptures != Lambda2.NumExplicitCaptures; DetectedOdrViolation |= Lambda1.HasKnownInternalLinkage != Lambda2.HasKnownInternalLinkage; DetectedOdrViolation |= Lambda1.ManglingNumber != Lambda2.ManglingNumber; if (Lambda1.NumCaptures && Lambda1.NumCaptures == Lambda2.NumCaptures) { for (unsigned I = 0, N = Lambda1.NumCaptures; I != N; ++I) { LambdaCapture &Cap1 = Lambda1.Captures.front()[I]; LambdaCapture &Cap2 = Lambda2.Captures.front()[I]; DetectedOdrViolation |= Cap1.getCaptureKind() != Cap2.getCaptureKind(); } Lambda1.AddCaptureList(Reader.getContext(), Lambda2.Captures.front()); } } // We don't want to check ODR for decls in the global module fragment. if (shouldSkipCheckingODR(MergeDD.Definition) || shouldSkipCheckingODR(D)) return; if (D->getODRHash() != MergeDD.ODRHash) { DetectedOdrViolation = true; } if (DetectedOdrViolation) Reader.PendingOdrMergeFailures[DD.Definition].push_back( {MergeDD.Definition, &MergeDD}); } void ASTDeclReader::ReadCXXRecordDefinition(CXXRecordDecl *D, bool Update, Decl *LambdaContext, unsigned IndexInLambdaContext) { struct CXXRecordDecl::DefinitionData *DD; ASTContext &C = Reader.getContext(); // Determine whether this is a lambda closure type, so that we can // allocate the appropriate DefinitionData structure. bool IsLambda = Record.readInt(); assert(!(IsLambda && Update) && "lambda definition should not be added by update record"); if (IsLambda) DD = new (C) CXXRecordDecl::LambdaDefinitionData( D, nullptr, CXXRecordDecl::LDK_Unknown, false, LCD_None); else DD = new (C) struct CXXRecordDecl::DefinitionData(D); CXXRecordDecl *Canon = D->getCanonicalDecl(); // Set decl definition data before reading it, so that during deserialization // when we read CXXRecordDecl, it already has definition data and we don't // set fake one. if (!Canon->DefinitionData) Canon->DefinitionData = DD; D->DefinitionData = Canon->DefinitionData; ReadCXXDefinitionData(*DD, D, LambdaContext, IndexInLambdaContext); // We might already have a different definition for this record. This can // happen either because we're reading an update record, or because we've // already done some merging. Either way, just merge into it. if (Canon->DefinitionData != DD) { MergeDefinitionData(Canon, std::move(*DD)); return; } // Mark this declaration as being a definition. D->setCompleteDefinition(true); // If this is not the first declaration or is an update record, we can have // other redeclarations already. Make a note that we need to propagate the // DefinitionData pointer onto them. if (Update || Canon != D) Reader.PendingDefinitions.insert(D); } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitCXXRecordDeclImpl(CXXRecordDecl *D) { RedeclarableResult Redecl = VisitRecordDeclImpl(D); ASTContext &C = Reader.getContext(); enum CXXRecKind { CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization, CXXLambda }; Decl *LambdaContext = nullptr; unsigned IndexInLambdaContext = 0; switch ((CXXRecKind)Record.readInt()) { case CXXRecNotTemplate: // Merged when we merge the folding set entry in the primary template. if (!isa(D)) mergeRedeclarable(D, Redecl); break; case CXXRecTemplate: { // Merged when we merge the template. auto *Template = readDeclAs(); D->TemplateOrInstantiation = Template; if (!Template->getTemplatedDecl()) { // We've not actually loaded the ClassTemplateDecl yet, because we're // currently being loaded as its pattern. Rely on it to set up our // TypeForDecl (see VisitClassTemplateDecl). // // Beware: we do not yet know our canonical declaration, and may still // get merged once the surrounding class template has got off the ground. DeferredTypeID = 0; } break; } case CXXRecMemberSpecialization: { auto *RD = readDeclAs(); auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); MemberSpecializationInfo *MSI = new (C) MemberSpecializationInfo(RD, TSK); MSI->setPointOfInstantiation(POI); D->TemplateOrInstantiation = MSI; mergeRedeclarable(D, Redecl); break; } case CXXLambda: { LambdaContext = readDecl(); if (LambdaContext) IndexInLambdaContext = Record.readInt(); mergeLambda(D, Redecl, LambdaContext, IndexInLambdaContext); break; } } bool WasDefinition = Record.readInt(); if (WasDefinition) ReadCXXRecordDefinition(D, /*Update=*/false, LambdaContext, IndexInLambdaContext); else // Propagate DefinitionData pointer from the canonical declaration. D->DefinitionData = D->getCanonicalDecl()->DefinitionData; // Lazily load the key function to avoid deserializing every method so we can // compute it. if (WasDefinition) { GlobalDeclID KeyFn = readDeclID(); if (KeyFn.isValid() && D->isCompleteDefinition()) // FIXME: This is wrong for the ARM ABI, where some other module may have // made this function no longer be a key function. We need an update // record or similar for that case. C.KeyFunctions[D] = KeyFn.getRawValue(); } return Redecl; } void ASTDeclReader::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { D->setExplicitSpecifier(Record.readExplicitSpec()); D->Ctor = readDeclAs(); VisitFunctionDecl(D); D->setDeductionCandidateKind( static_cast(Record.readInt())); } void ASTDeclReader::VisitCXXMethodDecl(CXXMethodDecl *D) { VisitFunctionDecl(D); unsigned NumOverridenMethods = Record.readInt(); if (D->isCanonicalDecl()) { while (NumOverridenMethods--) { // Avoid invariant checking of CXXMethodDecl::addOverriddenMethod, // MD may be initializing. if (auto *MD = readDeclAs()) Reader.getContext().addOverriddenMethod(D, MD->getCanonicalDecl()); } } else { // We don't care about which declarations this used to override; we get // the relevant information from the canonical declaration. Record.skipInts(NumOverridenMethods); } } void ASTDeclReader::VisitCXXConstructorDecl(CXXConstructorDecl *D) { // We need the inherited constructor information to merge the declaration, // so we have to read it before we call VisitCXXMethodDecl. D->setExplicitSpecifier(Record.readExplicitSpec()); if (D->isInheritingConstructor()) { auto *Shadow = readDeclAs(); auto *Ctor = readDeclAs(); *D->getTrailingObjects() = InheritedConstructor(Shadow, Ctor); } VisitCXXMethodDecl(D); } void ASTDeclReader::VisitCXXDestructorDecl(CXXDestructorDecl *D) { VisitCXXMethodDecl(D); if (auto *OperatorDelete = readDeclAs()) { CXXDestructorDecl *Canon = D->getCanonicalDecl(); auto *ThisArg = Record.readExpr(); // FIXME: Check consistency if we have an old and new operator delete. if (!Canon->OperatorDelete) { Canon->OperatorDelete = OperatorDelete; Canon->OperatorDeleteThisArg = ThisArg; } } } void ASTDeclReader::VisitCXXConversionDecl(CXXConversionDecl *D) { D->setExplicitSpecifier(Record.readExplicitSpec()); VisitCXXMethodDecl(D); } void ASTDeclReader::VisitImportDecl(ImportDecl *D) { VisitDecl(D); D->ImportedModule = readModule(); D->setImportComplete(Record.readInt()); auto *StoredLocs = D->getTrailingObjects(); for (unsigned I = 0, N = Record.back(); I != N; ++I) StoredLocs[I] = readSourceLocation(); Record.skipInts(1); // The number of stored source locations. } void ASTDeclReader::VisitAccessSpecDecl(AccessSpecDecl *D) { VisitDecl(D); D->setColonLoc(readSourceLocation()); } void ASTDeclReader::VisitFriendDecl(FriendDecl *D) { VisitDecl(D); if (Record.readInt()) // hasFriendDecl D->Friend = readDeclAs(); else D->Friend = readTypeSourceInfo(); for (unsigned i = 0; i != D->NumTPLists; ++i) D->getTrailingObjects()[i] = Record.readTemplateParameterList(); D->NextFriend = readDeclID().getRawValue(); D->UnsupportedFriend = (Record.readInt() != 0); D->FriendLoc = readSourceLocation(); } void ASTDeclReader::VisitFriendTemplateDecl(FriendTemplateDecl *D) { VisitDecl(D); unsigned NumParams = Record.readInt(); D->NumParams = NumParams; D->Params = new (Reader.getContext()) TemplateParameterList *[NumParams]; for (unsigned i = 0; i != NumParams; ++i) D->Params[i] = Record.readTemplateParameterList(); if (Record.readInt()) // HasFriendDecl D->Friend = readDeclAs(); else D->Friend = readTypeSourceInfo(); D->FriendLoc = readSourceLocation(); } void ASTDeclReader::VisitTemplateDecl(TemplateDecl *D) { VisitNamedDecl(D); assert(!D->TemplateParams && "TemplateParams already set!"); D->TemplateParams = Record.readTemplateParameterList(); D->init(readDeclAs()); } void ASTDeclReader::VisitConceptDecl(ConceptDecl *D) { VisitTemplateDecl(D); D->ConstraintExpr = Record.readExpr(); mergeMergeable(D); } void ASTDeclReader::VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D) { // The size of the template list was read during creation of the Decl, so we // don't have to re-read it here. VisitDecl(D); llvm::SmallVector Args; for (unsigned I = 0; I < D->NumTemplateArgs; ++I) Args.push_back(Record.readTemplateArgument(/*Canonicalize=*/true)); D->setTemplateArguments(Args); } void ASTDeclReader::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) { } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarable(D); // Make sure we've allocated the Common pointer first. We do this before // VisitTemplateDecl so that getCommonPtr() can be used during initialization. RedeclarableTemplateDecl *CanonD = D->getCanonicalDecl(); if (!CanonD->Common) { CanonD->Common = CanonD->newCommon(Reader.getContext()); Reader.PendingDefinitions.insert(CanonD); } D->Common = CanonD->Common; // If this is the first declaration of the template, fill in the information // for the 'common' pointer. if (ThisDeclID == Redecl.getFirstID()) { if (auto *RTD = readDeclAs()) { assert(RTD->getKind() == D->getKind() && "InstantiatedFromMemberTemplate kind mismatch"); D->setInstantiatedFromMemberTemplate(RTD); if (Record.readInt()) D->setMemberSpecialization(); } } VisitTemplateDecl(D); D->IdentifierNamespace = Record.readInt(); return Redecl; } void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); mergeRedeclarableTemplate(D, Redecl); if (ThisDeclID == Redecl.getFirstID()) { // This ClassTemplateDecl owns a CommonPtr; read it to keep track of all of // the specializations. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } if (D->getTemplatedDecl()->TemplateOrInstantiation) { // We were loaded before our templated declaration was. We've not set up // its corresponding type yet (see VisitCXXRecordDeclImpl), so reconstruct // it now. Reader.getContext().getInjectedClassNameType( D->getTemplatedDecl(), D->getInjectedClassNameSpecialization()); } } void ASTDeclReader::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) { llvm_unreachable("BuiltinTemplates are not serialized"); } /// TODO: Unify with ClassTemplateDecl version? /// May require unifying ClassTemplateDecl and /// VarTemplateDecl beyond TemplateDecl... void ASTDeclReader::VisitVarTemplateDecl(VarTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); mergeRedeclarableTemplate(D, Redecl); if (ThisDeclID == Redecl.getFirstID()) { // This VarTemplateDecl owns a CommonPtr; read it to keep track of all of // the specializations. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } } ASTDeclReader::RedeclarableResult ASTDeclReader::VisitClassTemplateSpecializationDeclImpl( ClassTemplateSpecializationDecl *D) { RedeclarableResult Redecl = VisitCXXRecordDeclImpl(D); ASTContext &C = Reader.getContext(); if (Decl *InstD = readDecl()) { if (auto *CTD = dyn_cast(InstD)) { D->SpecializedTemplate = CTD; } else { SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy(C, TemplArgs); auto *PS = new (C) ClassTemplateSpecializationDecl:: SpecializedPartialSpecialization(); PS->PartialSpecialization = cast(InstD); PS->TemplateArgs = ArgList; D->SpecializedTemplate = PS; } } SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs); D->PointOfInstantiation = readSourceLocation(); D->SpecializationKind = (TemplateSpecializationKind)Record.readInt(); bool writtenAsCanonicalDecl = Record.readInt(); if (writtenAsCanonicalDecl) { auto *CanonPattern = readDeclAs(); if (D->isCanonicalDecl()) { // It's kept in the folding set. // Set this as, or find, the canonical declaration for this specialization ClassTemplateSpecializationDecl *CanonSpec; if (auto *Partial = dyn_cast(D)) { CanonSpec = CanonPattern->getCommonPtr()->PartialSpecializations .GetOrInsertNode(Partial); } else { CanonSpec = CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D); } // If there was already a canonical specialization, merge into it. if (CanonSpec != D) { mergeRedeclarable(D, CanonSpec, Redecl); // This declaration might be a definition. Merge with any existing // definition. if (auto *DDD = D->DefinitionData) { if (CanonSpec->DefinitionData) MergeDefinitionData(CanonSpec, std::move(*DDD)); else CanonSpec->DefinitionData = D->DefinitionData; } D->DefinitionData = CanonSpec->DefinitionData; } } } // extern/template keyword locations for explicit instantiations if (Record.readBool()) { auto *ExplicitInfo = new (C) ExplicitInstantiationInfo; ExplicitInfo->ExternKeywordLoc = readSourceLocation(); ExplicitInfo->TemplateKeywordLoc = readSourceLocation(); D->ExplicitInfo = ExplicitInfo; } if (Record.readBool()) D->setTemplateArgsAsWritten(Record.readASTTemplateArgumentListInfo()); return Redecl; } void ASTDeclReader::VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { // We need to read the template params first because redeclarable is going to // need them for profiling TemplateParameterList *Params = Record.readTemplateParameterList(); D->TemplateParams = Params; RedeclarableResult Redecl = VisitClassTemplateSpecializationDeclImpl(D); // These are read/set from/to the first declaration. if (ThisDeclID == Redecl.getFirstID()) { D->InstantiatedFromMember.setPointer( readDeclAs()); D->InstantiatedFromMember.setInt(Record.readInt()); } } void ASTDeclReader::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); if (ThisDeclID == Redecl.getFirstID()) { // This FunctionTemplateDecl owns a CommonPtr; read it. SmallVector SpecIDs; readDeclIDList(SpecIDs); ASTDeclReader::AddLazySpecializations(D, SpecIDs); } } /// TODO: Unify with ClassTemplateSpecializationDecl version? /// May require unifying ClassTemplate(Partial)SpecializationDecl and /// VarTemplate(Partial)SpecializationDecl with a new data /// structure Template(Partial)SpecializationDecl, and /// using Template(Partial)SpecializationDecl as input type. ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarTemplateSpecializationDeclImpl( VarTemplateSpecializationDecl *D) { ASTContext &C = Reader.getContext(); if (Decl *InstD = readDecl()) { if (auto *VTD = dyn_cast(InstD)) { D->SpecializedTemplate = VTD; } else { SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); TemplateArgumentList *ArgList = TemplateArgumentList::CreateCopy( C, TemplArgs); auto *PS = new (C) VarTemplateSpecializationDecl::SpecializedPartialSpecialization(); PS->PartialSpecialization = cast(InstD); PS->TemplateArgs = ArgList; D->SpecializedTemplate = PS; } } // extern/template keyword locations for explicit instantiations if (Record.readBool()) { auto *ExplicitInfo = new (C) ExplicitInstantiationInfo; ExplicitInfo->ExternKeywordLoc = readSourceLocation(); ExplicitInfo->TemplateKeywordLoc = readSourceLocation(); D->ExplicitInfo = ExplicitInfo; } if (Record.readBool()) D->setTemplateArgsAsWritten(Record.readASTTemplateArgumentListInfo()); SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs, /*Canonicalize*/ true); D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs); D->PointOfInstantiation = readSourceLocation(); D->SpecializationKind = (TemplateSpecializationKind)Record.readInt(); D->IsCompleteDefinition = Record.readInt(); RedeclarableResult Redecl = VisitVarDeclImpl(D); bool writtenAsCanonicalDecl = Record.readInt(); if (writtenAsCanonicalDecl) { auto *CanonPattern = readDeclAs(); if (D->isCanonicalDecl()) { // It's kept in the folding set. VarTemplateSpecializationDecl *CanonSpec; if (auto *Partial = dyn_cast(D)) { CanonSpec = CanonPattern->getCommonPtr() ->PartialSpecializations.GetOrInsertNode(Partial); } else { CanonSpec = CanonPattern->getCommonPtr()->Specializations.GetOrInsertNode(D); } // If we already have a matching specialization, merge it. if (CanonSpec != D) mergeRedeclarable(D, CanonSpec, Redecl); } } return Redecl; } /// TODO: Unify with ClassTemplatePartialSpecializationDecl version? /// May require unifying ClassTemplate(Partial)SpecializationDecl and /// VarTemplate(Partial)SpecializationDecl with a new data /// structure Template(Partial)SpecializationDecl, and /// using Template(Partial)SpecializationDecl as input type. void ASTDeclReader::VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D) { TemplateParameterList *Params = Record.readTemplateParameterList(); D->TemplateParams = Params; RedeclarableResult Redecl = VisitVarTemplateSpecializationDeclImpl(D); // These are read/set from/to the first declaration. if (ThisDeclID == Redecl.getFirstID()) { D->InstantiatedFromMember.setPointer( readDeclAs()); D->InstantiatedFromMember.setInt(Record.readInt()); } } void ASTDeclReader::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) { VisitTypeDecl(D); D->setDeclaredWithTypename(Record.readInt()); - if (D->hasTypeConstraint()) { + const bool TypeConstraintInitialized = Record.readBool(); + if (TypeConstraintInitialized && D->hasTypeConstraint()) { ConceptReference *CR = nullptr; if (Record.readBool()) CR = Record.readConceptReference(); Expr *ImmediatelyDeclaredConstraint = Record.readExpr(); D->setTypeConstraint(CR, ImmediatelyDeclaredConstraint); if ((D->ExpandedParameterPack = Record.readInt())) D->NumExpanded = Record.readInt(); } if (Record.readInt()) D->setDefaultArgument(Reader.getContext(), Record.readTemplateArgumentLoc()); } void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) { VisitDeclaratorDecl(D); // TemplateParmPosition. D->setDepth(Record.readInt()); D->setPosition(Record.readInt()); if (D->hasPlaceholderTypeConstraint()) D->setPlaceholderTypeConstraint(Record.readExpr()); if (D->isExpandedParameterPack()) { auto TypesAndInfos = D->getTrailingObjects>(); for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { new (&TypesAndInfos[I].first) QualType(Record.readType()); TypesAndInfos[I].second = readTypeSourceInfo(); } } else { // Rest of NonTypeTemplateParmDecl. D->ParameterPack = Record.readInt(); if (Record.readInt()) D->setDefaultArgument(Reader.getContext(), Record.readTemplateArgumentLoc()); } } void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) { VisitTemplateDecl(D); D->setDeclaredWithTypename(Record.readBool()); // TemplateParmPosition. D->setDepth(Record.readInt()); D->setPosition(Record.readInt()); if (D->isExpandedParameterPack()) { auto **Data = D->getTrailingObjects(); for (unsigned I = 0, N = D->getNumExpansionTemplateParameters(); I != N; ++I) Data[I] = Record.readTemplateParameterList(); } else { // Rest of TemplateTemplateParmDecl. D->ParameterPack = Record.readInt(); if (Record.readInt()) D->setDefaultArgument(Reader.getContext(), Record.readTemplateArgumentLoc()); } } void ASTDeclReader::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { RedeclarableResult Redecl = VisitRedeclarableTemplateDecl(D); mergeRedeclarableTemplate(D, Redecl); } void ASTDeclReader::VisitStaticAssertDecl(StaticAssertDecl *D) { VisitDecl(D); D->AssertExprAndFailed.setPointer(Record.readExpr()); D->AssertExprAndFailed.setInt(Record.readInt()); D->Message = cast_or_null(Record.readExpr()); D->RParenLoc = readSourceLocation(); } void ASTDeclReader::VisitEmptyDecl(EmptyDecl *D) { VisitDecl(D); } void ASTDeclReader::VisitLifetimeExtendedTemporaryDecl( LifetimeExtendedTemporaryDecl *D) { VisitDecl(D); D->ExtendingDecl = readDeclAs(); D->ExprWithTemporary = Record.readStmt(); if (Record.readInt()) { D->Value = new (D->getASTContext()) APValue(Record.readAPValue()); D->getASTContext().addDestruction(D->Value); } D->ManglingNumber = Record.readInt(); mergeMergeable(D); } std::pair ASTDeclReader::VisitDeclContext(DeclContext *DC) { uint64_t LexicalOffset = ReadLocalOffset(); uint64_t VisibleOffset = ReadLocalOffset(); return std::make_pair(LexicalOffset, VisibleOffset); } template ASTDeclReader::RedeclarableResult ASTDeclReader::VisitRedeclarable(Redeclarable *D) { GlobalDeclID FirstDeclID = readDeclID(); Decl *MergeWith = nullptr; bool IsKeyDecl = ThisDeclID == FirstDeclID; bool IsFirstLocalDecl = false; uint64_t RedeclOffset = 0; // invalid FirstDeclID indicates that this declaration was the only // declaration of its entity, and is used for space optimization. if (FirstDeclID.isInvalid()) { FirstDeclID = ThisDeclID; IsKeyDecl = true; IsFirstLocalDecl = true; } else if (unsigned N = Record.readInt()) { // This declaration was the first local declaration, but may have imported // other declarations. IsKeyDecl = N == 1; IsFirstLocalDecl = true; // We have some declarations that must be before us in our redeclaration // chain. Read them now, and remember that we ought to merge with one of // them. // FIXME: Provide a known merge target to the second and subsequent such // declaration. for (unsigned I = 0; I != N - 1; ++I) MergeWith = readDecl(); RedeclOffset = ReadLocalOffset(); } else { // This declaration was not the first local declaration. Read the first // local declaration now, to trigger the import of other redeclarations. (void)readDecl(); } auto *FirstDecl = cast_or_null(Reader.GetDecl(FirstDeclID)); if (FirstDecl != D) { // We delay loading of the redeclaration chain to avoid deeply nested calls. // We temporarily set the first (canonical) declaration as the previous one // which is the one that matters and mark the real previous DeclID to be // loaded & attached later on. D->RedeclLink = Redeclarable::PreviousDeclLink(FirstDecl); D->First = FirstDecl->getCanonicalDecl(); } auto *DAsT = static_cast(D); // Note that we need to load local redeclarations of this decl and build a // decl chain for them. This must happen *after* we perform the preloading // above; this ensures that the redeclaration chain is built in the correct // order. if (IsFirstLocalDecl) Reader.PendingDeclChains.push_back(std::make_pair(DAsT, RedeclOffset)); return RedeclarableResult(MergeWith, FirstDeclID, IsKeyDecl); } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity. template void ASTDeclReader::mergeRedeclarable(Redeclarable *DBase, RedeclarableResult &Redecl) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // If we're not the canonical declaration, we don't need to merge. if (!DBase->isFirstDecl()) return; auto *D = static_cast(DBase); if (auto *Existing = Redecl.getKnownMergeTarget()) // We already know of an existing declaration we should merge with. mergeRedeclarable(D, cast(Existing), Redecl); else if (FindExistingResult ExistingRes = findExisting(D)) if (T *Existing = ExistingRes) mergeRedeclarable(D, Existing, Redecl); } /// Attempt to merge D with a previous declaration of the same lambda, which is /// found by its index within its context declaration, if it has one. /// /// We can't look up lambdas in their enclosing lexical or semantic context in /// general, because for lambdas in variables, both of those might be a /// namespace or the translation unit. void ASTDeclReader::mergeLambda(CXXRecordDecl *D, RedeclarableResult &Redecl, Decl *Context, unsigned IndexInContext) { // If we don't have a mangling context, treat this like any other // declaration. if (!Context) return mergeRedeclarable(D, Redecl); // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // If we're not the canonical declaration, we don't need to merge. if (!D->isFirstDecl()) return; if (auto *Existing = Redecl.getKnownMergeTarget()) // We already know of an existing declaration we should merge with. mergeRedeclarable(D, cast(Existing), Redecl); // Look up this lambda to see if we've seen it before. If so, merge with the // one we already loaded. NamedDecl *&Slot = Reader.LambdaDeclarationsForMerging[{ Context->getCanonicalDecl(), IndexInContext}]; if (Slot) mergeRedeclarable(D, cast(Slot), Redecl); else Slot = D; } void ASTDeclReader::mergeRedeclarableTemplate(RedeclarableTemplateDecl *D, RedeclarableResult &Redecl) { mergeRedeclarable(D, Redecl); // If we merged the template with a prior declaration chain, merge the // common pointer. // FIXME: Actually merge here, don't just overwrite. D->Common = D->getCanonicalDecl()->Common; } /// "Cast" to type T, asserting if we don't have an implicit conversion. /// We use this to put code in a template that will only be valid for certain /// instantiations. template static T assert_cast(T t) { return t; } template static T assert_cast(...) { llvm_unreachable("bad assert_cast"); } /// Merge together the pattern declarations from two template /// declarations. void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D, RedeclarableTemplateDecl *Existing, bool IsKeyDecl) { auto *DPattern = D->getTemplatedDecl(); auto *ExistingPattern = Existing->getTemplatedDecl(); RedeclarableResult Result( /*MergeWith*/ ExistingPattern, DPattern->getCanonicalDecl()->getGlobalID(), IsKeyDecl); if (auto *DClass = dyn_cast(DPattern)) { // Merge with any existing definition. // FIXME: This is duplicated in several places. Refactor. auto *ExistingClass = cast(ExistingPattern)->getCanonicalDecl(); if (auto *DDD = DClass->DefinitionData) { if (ExistingClass->DefinitionData) { MergeDefinitionData(ExistingClass, std::move(*DDD)); } else { ExistingClass->DefinitionData = DClass->DefinitionData; // We may have skipped this before because we thought that DClass // was the canonical declaration. Reader.PendingDefinitions.insert(DClass); } } DClass->DefinitionData = ExistingClass->DefinitionData; return mergeRedeclarable(DClass, cast(ExistingPattern), Result); } if (auto *DFunction = dyn_cast(DPattern)) return mergeRedeclarable(DFunction, cast(ExistingPattern), Result); if (auto *DVar = dyn_cast(DPattern)) return mergeRedeclarable(DVar, cast(ExistingPattern), Result); if (auto *DAlias = dyn_cast(DPattern)) return mergeRedeclarable(DAlias, cast(ExistingPattern), Result); llvm_unreachable("merged an unknown kind of redeclarable template"); } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity. template void ASTDeclReader::mergeRedeclarable(Redeclarable *DBase, T *Existing, RedeclarableResult &Redecl) { auto *D = static_cast(DBase); T *ExistingCanon = Existing->getCanonicalDecl(); T *DCanon = D->getCanonicalDecl(); if (ExistingCanon != DCanon) { // Have our redeclaration link point back at the canonical declaration // of the existing declaration, so that this declaration has the // appropriate canonical declaration. D->RedeclLink = Redeclarable::PreviousDeclLink(ExistingCanon); D->First = ExistingCanon; ExistingCanon->Used |= D->Used; D->Used = false; // When we merge a template, merge its pattern. if (auto *DTemplate = dyn_cast(D)) mergeTemplatePattern( DTemplate, assert_cast(ExistingCanon), Redecl.isKeyDecl()); // If this declaration is a key declaration, make a note of that. if (Redecl.isKeyDecl()) Reader.KeyDecls[ExistingCanon].push_back(Redecl.getFirstID()); } } /// ODR-like semantics for C/ObjC allow us to merge tag types and a structural /// check in Sema guarantees the types can be merged (see C11 6.2.7/1 or C89 /// 6.1.2.6/1). Although most merging is done in Sema, we need to guarantee /// that some types are mergeable during deserialization, otherwise name /// lookup fails. This is the case for EnumConstantDecl. static bool allowODRLikeMergeInC(NamedDecl *ND) { if (!ND) return false; // TODO: implement merge for other necessary decls. if (isa(ND)) return true; return false; } /// Attempts to merge LifetimeExtendedTemporaryDecl with /// identical class definitions from two different modules. void ASTDeclReader::mergeMergeable(LifetimeExtendedTemporaryDecl *D) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; LifetimeExtendedTemporaryDecl *LETDecl = D; LifetimeExtendedTemporaryDecl *&LookupResult = Reader.LETemporaryForMerging[std::make_pair( LETDecl->getExtendingDecl(), LETDecl->getManglingNumber())]; if (LookupResult) Reader.getContext().setPrimaryMergedDecl(LETDecl, LookupResult->getCanonicalDecl()); else LookupResult = LETDecl; } /// Attempts to merge the given declaration (D) with another declaration /// of the same entity, for the case where the entity is not actually /// redeclarable. This happens, for instance, when merging the fields of /// identical class definitions from two different modules. template void ASTDeclReader::mergeMergeable(Mergeable *D) { // If modules are not available, there is no reason to perform this merge. if (!Reader.getContext().getLangOpts().Modules) return; // ODR-based merging is performed in C++ and in some cases (tag types) in C. // Note that C identically-named things in different translation units are // not redeclarations, but may still have compatible types, where ODR-like // semantics may apply. if (!Reader.getContext().getLangOpts().CPlusPlus && !allowODRLikeMergeInC(dyn_cast(static_cast(D)))) return; if (FindExistingResult ExistingRes = findExisting(static_cast(D))) if (T *Existing = ExistingRes) Reader.getContext().setPrimaryMergedDecl(static_cast(D), Existing->getCanonicalDecl()); } void ASTDeclReader::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) { Record.readOMPChildren(D->Data); VisitDecl(D); } void ASTDeclReader::VisitOMPAllocateDecl(OMPAllocateDecl *D) { Record.readOMPChildren(D->Data); VisitDecl(D); } void ASTDeclReader::VisitOMPRequiresDecl(OMPRequiresDecl * D) { Record.readOMPChildren(D->Data); VisitDecl(D); } void ASTDeclReader::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) { VisitValueDecl(D); D->setLocation(readSourceLocation()); Expr *In = Record.readExpr(); Expr *Out = Record.readExpr(); D->setCombinerData(In, Out); Expr *Combiner = Record.readExpr(); D->setCombiner(Combiner); Expr *Orig = Record.readExpr(); Expr *Priv = Record.readExpr(); D->setInitializerData(Orig, Priv); Expr *Init = Record.readExpr(); auto IK = static_cast(Record.readInt()); D->setInitializer(Init, IK); D->PrevDeclInScope = readDeclID().getRawValue(); } void ASTDeclReader::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { Record.readOMPChildren(D->Data); VisitValueDecl(D); D->VarName = Record.readDeclarationName(); D->PrevDeclInScope = readDeclID().getRawValue(); } void ASTDeclReader::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { VisitVarDecl(D); } //===----------------------------------------------------------------------===// // Attribute Reading //===----------------------------------------------------------------------===// namespace { class AttrReader { ASTRecordReader &Reader; public: AttrReader(ASTRecordReader &Reader) : Reader(Reader) {} uint64_t readInt() { return Reader.readInt(); } bool readBool() { return Reader.readBool(); } SourceRange readSourceRange() { return Reader.readSourceRange(); } SourceLocation readSourceLocation() { return Reader.readSourceLocation(); } Expr *readExpr() { return Reader.readExpr(); } Attr *readAttr() { return Reader.readAttr(); } std::string readString() { return Reader.readString(); } TypeSourceInfo *readTypeSourceInfo() { return Reader.readTypeSourceInfo(); } IdentifierInfo *readIdentifier() { return Reader.readIdentifier(); } VersionTuple readVersionTuple() { return Reader.readVersionTuple(); } OMPTraitInfo *readOMPTraitInfo() { return Reader.readOMPTraitInfo(); } template T *readDeclAs() { return Reader.readDeclAs(); } }; } Attr *ASTRecordReader::readAttr() { AttrReader Record(*this); auto V = Record.readInt(); if (!V) return nullptr; Attr *New = nullptr; // Kind is stored as a 1-based integer because 0 is used to indicate a null // Attr pointer. auto Kind = static_cast(V - 1); ASTContext &Context = getContext(); IdentifierInfo *AttrName = Record.readIdentifier(); IdentifierInfo *ScopeName = Record.readIdentifier(); SourceRange AttrRange = Record.readSourceRange(); SourceLocation ScopeLoc = Record.readSourceLocation(); unsigned ParsedKind = Record.readInt(); unsigned Syntax = Record.readInt(); unsigned SpellingIndex = Record.readInt(); bool IsAlignas = (ParsedKind == AttributeCommonInfo::AT_Aligned && Syntax == AttributeCommonInfo::AS_Keyword && SpellingIndex == AlignedAttr::Keyword_alignas); bool IsRegularKeywordAttribute = Record.readBool(); AttributeCommonInfo Info(AttrName, ScopeName, AttrRange, ScopeLoc, AttributeCommonInfo::Kind(ParsedKind), {AttributeCommonInfo::Syntax(Syntax), SpellingIndex, IsAlignas, IsRegularKeywordAttribute}); #include "clang/Serialization/AttrPCHRead.inc" assert(New && "Unable to decode attribute?"); return New; } /// Reads attributes from the current stream position. void ASTRecordReader::readAttributes(AttrVec &Attrs) { for (unsigned I = 0, E = readInt(); I != E; ++I) if (auto *A = readAttr()) Attrs.push_back(A); } //===----------------------------------------------------------------------===// // ASTReader Implementation //===----------------------------------------------------------------------===// /// Note that we have loaded the declaration with the given /// Index. /// /// This routine notes that this declaration has already been loaded, /// so that future GetDecl calls will return this declaration rather /// than trying to load a new declaration. inline void ASTReader::LoadedDecl(unsigned Index, Decl *D) { assert(!DeclsLoaded[Index] && "Decl loaded twice?"); DeclsLoaded[Index] = D; } /// Determine whether the consumer will be interested in seeing /// this declaration (via HandleTopLevelDecl). /// /// This routine should return true for anything that might affect /// code generation, e.g., inline function definitions, Objective-C /// declarations with metadata, etc. bool ASTReader::isConsumerInterestedIn(Decl *D) { // An ObjCMethodDecl is never considered as "interesting" because its // implementation container always is. // An ImportDecl or VarDecl imported from a module map module will get // emitted when we import the relevant module. if (isPartOfPerModuleInitializer(D)) { auto *M = D->getImportedOwningModule(); if (M && M->Kind == Module::ModuleMapModule && getContext().DeclMustBeEmitted(D)) return false; } if (isa(D)) return true; if (isa(D)) return !D->getDeclContext()->isFunctionOrMethod(); if (const auto *Var = dyn_cast(D)) return Var->isFileVarDecl() && (Var->isThisDeclarationADefinition() == VarDecl::Definition || OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Var)); if (const auto *Func = dyn_cast(D)) return Func->doesThisDeclarationHaveABody() || PendingBodies.count(D); if (auto *ES = D->getASTContext().getExternalSource()) if (ES->hasExternalDefinitions(D) == ExternalASTSource::EK_Never) return true; return false; } /// Get the correct cursor and offset for loading a declaration. ASTReader::RecordLocation ASTReader::DeclCursorForID(GlobalDeclID ID, SourceLocation &Loc) { ModuleFile *M = getOwningModuleFile(ID); assert(M); unsigned LocalDeclIndex = ID.getLocalDeclIndex(); const DeclOffset &DOffs = M->DeclOffsets[LocalDeclIndex]; Loc = ReadSourceLocation(*M, DOffs.getRawLoc()); return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset)); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { auto I = GlobalBitOffsetsMap.find(GlobalOffset); assert(I != GlobalBitOffsetsMap.end() && "Corrupted global bit offsets map"); return RecordLocation(I->second, GlobalOffset - I->second->GlobalBitOffset); } uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset) { return LocalOffset + M.GlobalBitOffset; } CXXRecordDecl * ASTDeclReader::getOrFakePrimaryClassDefinition(ASTReader &Reader, CXXRecordDecl *RD) { // Try to dig out the definition. auto *DD = RD->DefinitionData; if (!DD) DD = RD->getCanonicalDecl()->DefinitionData; // If there's no definition yet, then DC's definition is added by an update // record, but we've not yet loaded that update record. In this case, we // commit to DC being the canonical definition now, and will fix this when // we load the update record. if (!DD) { DD = new (Reader.getContext()) struct CXXRecordDecl::DefinitionData(RD); RD->setCompleteDefinition(true); RD->DefinitionData = DD; RD->getCanonicalDecl()->DefinitionData = DD; // Track that we did this horrible thing so that we can fix it later. Reader.PendingFakeDefinitionData.insert( std::make_pair(DD, ASTReader::PendingFakeDefinitionKind::Fake)); } return DD->Definition; } /// Find the context in which we should search for previous declarations when /// looking for declarations to merge. DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader, DeclContext *DC) { if (auto *ND = dyn_cast(DC)) return ND->getFirstDecl(); if (auto *RD = dyn_cast(DC)) return getOrFakePrimaryClassDefinition(Reader, RD); if (auto *RD = dyn_cast(DC)) return RD->getDefinition(); if (auto *ED = dyn_cast(DC)) return ED->getDefinition(); if (auto *OID = dyn_cast(DC)) return OID->getDefinition(); // We can see the TU here only if we have no Sema object. It is possible // we're in clang-repl so we still need to get the primary context. if (auto *TU = dyn_cast(DC)) return TU->getPrimaryContext(); return nullptr; } ASTDeclReader::FindExistingResult::~FindExistingResult() { // Record that we had a typedef name for linkage whether or not we merge // with that declaration. if (TypedefNameForLinkage) { DeclContext *DC = New->getDeclContext()->getRedeclContext(); Reader.ImportedTypedefNamesForLinkage.insert( std::make_pair(std::make_pair(DC, TypedefNameForLinkage), New)); return; } if (!AddResult || Existing) return; DeclarationName Name = New->getDeclName(); DeclContext *DC = New->getDeclContext()->getRedeclContext(); if (needsAnonymousDeclarationNumber(New)) { setAnonymousDeclForMerging(Reader, New->getLexicalDeclContext(), AnonymousDeclNumber, New); } else if (DC->isTranslationUnit() && !Reader.getContext().getLangOpts().CPlusPlus) { if (Reader.getIdResolver().tryAddTopLevelDecl(New, Name)) Reader.PendingFakeLookupResults[Name.getAsIdentifierInfo()] .push_back(New); } else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) { // Add the declaration to its redeclaration context so later merging // lookups will find it. MergeDC->makeDeclVisibleInContextImpl(New, /*Internal*/true); } } /// Find the declaration that should be merged into, given the declaration found /// by name lookup. If we're merging an anonymous declaration within a typedef, /// we need a matching typedef, and we merge with the type inside it. static NamedDecl *getDeclForMerging(NamedDecl *Found, bool IsTypedefNameForLinkage) { if (!IsTypedefNameForLinkage) return Found; // If we found a typedef declaration that gives a name to some other // declaration, then we want that inner declaration. Declarations from // AST files are handled via ImportedTypedefNamesForLinkage. if (Found->isFromASTFile()) return nullptr; if (auto *TND = dyn_cast(Found)) return TND->getAnonDeclWithTypedefName(/*AnyRedecl*/true); return nullptr; } /// Find the declaration to use to populate the anonymous declaration table /// for the given lexical DeclContext. We only care about finding local /// definitions of the context; we'll merge imported ones as we go. DeclContext * ASTDeclReader::getPrimaryDCForAnonymousDecl(DeclContext *LexicalDC) { // For classes, we track the definition as we merge. if (auto *RD = dyn_cast(LexicalDC)) { auto *DD = RD->getCanonicalDecl()->DefinitionData; return DD ? DD->Definition : nullptr; } else if (auto *OID = dyn_cast(LexicalDC)) { return OID->getCanonicalDecl()->getDefinition(); } // For anything else, walk its merged redeclarations looking for a definition. // Note that we can't just call getDefinition here because the redeclaration // chain isn't wired up. for (auto *D : merged_redecls(cast(LexicalDC))) { if (auto *FD = dyn_cast(D)) if (FD->isThisDeclarationADefinition()) return FD; if (auto *MD = dyn_cast(D)) if (MD->isThisDeclarationADefinition()) return MD; if (auto *RD = dyn_cast(D)) if (RD->isThisDeclarationADefinition()) return RD; } // No merged definition yet. return nullptr; } NamedDecl *ASTDeclReader::getAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index) { // If the lexical context has been merged, look into the now-canonical // definition. auto *CanonDC = cast(DC)->getCanonicalDecl(); // If we've seen this before, return the canonical declaration. auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC]; if (Index < Previous.size() && Previous[Index]) return Previous[Index]; // If this is the first time, but we have parsed a declaration of the context, // build the anonymous declaration list from the parsed declaration. auto *PrimaryDC = getPrimaryDCForAnonymousDecl(DC); if (PrimaryDC && !cast(PrimaryDC)->isFromASTFile()) { numberAnonymousDeclsWithin(PrimaryDC, [&](NamedDecl *ND, unsigned Number) { if (Previous.size() == Number) Previous.push_back(cast(ND->getCanonicalDecl())); else Previous[Number] = cast(ND->getCanonicalDecl()); }); } return Index < Previous.size() ? Previous[Index] : nullptr; } void ASTDeclReader::setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC, unsigned Index, NamedDecl *D) { auto *CanonDC = cast(DC)->getCanonicalDecl(); auto &Previous = Reader.AnonymousDeclarationsForMerging[CanonDC]; if (Index >= Previous.size()) Previous.resize(Index + 1); if (!Previous[Index]) Previous[Index] = D; } ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) { DeclarationName Name = TypedefNameForLinkage ? TypedefNameForLinkage : D->getDeclName(); if (!Name && !needsAnonymousDeclarationNumber(D)) { // Don't bother trying to find unnamed declarations that are in // unmergeable contexts. FindExistingResult Result(Reader, D, /*Existing=*/nullptr, AnonymousDeclNumber, TypedefNameForLinkage); Result.suppress(); return Result; } ASTContext &C = Reader.getContext(); DeclContext *DC = D->getDeclContext()->getRedeclContext(); if (TypedefNameForLinkage) { auto It = Reader.ImportedTypedefNamesForLinkage.find( std::make_pair(DC, TypedefNameForLinkage)); if (It != Reader.ImportedTypedefNamesForLinkage.end()) if (C.isSameEntity(It->second, D)) return FindExistingResult(Reader, D, It->second, AnonymousDeclNumber, TypedefNameForLinkage); // Go on to check in other places in case an existing typedef name // was not imported. } if (needsAnonymousDeclarationNumber(D)) { // This is an anonymous declaration that we may need to merge. Look it up // in its context by number. if (auto *Existing = getAnonymousDeclForMerging( Reader, D->getLexicalDeclContext(), AnonymousDeclNumber)) if (C.isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } else if (DC->isTranslationUnit() && !Reader.getContext().getLangOpts().CPlusPlus) { IdentifierResolver &IdResolver = Reader.getIdResolver(); // Temporarily consider the identifier to be up-to-date. We don't want to // cause additional lookups here. class UpToDateIdentifierRAII { IdentifierInfo *II; bool WasOutToDate = false; public: explicit UpToDateIdentifierRAII(IdentifierInfo *II) : II(II) { if (II) { WasOutToDate = II->isOutOfDate(); if (WasOutToDate) II->setOutOfDate(false); } } ~UpToDateIdentifierRAII() { if (WasOutToDate) II->setOutOfDate(true); } } UpToDate(Name.getAsIdentifierInfo()); for (IdentifierResolver::iterator I = IdResolver.begin(Name), IEnd = IdResolver.end(); I != IEnd; ++I) { if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage)) if (C.isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } } else if (DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC)) { DeclContext::lookup_result R = MergeDC->noload_lookup(Name); for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) { if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage)) if (C.isSameEntity(Existing, D)) return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber, TypedefNameForLinkage); } } else { // Not in a mergeable context. return FindExistingResult(Reader); } // If this declaration is from a merged context, make a note that we need to // check that the canonical definition of that context contains the decl. // // Note that we don't perform ODR checks for decls from the global module // fragment. // // FIXME: We should do something similar if we merge two definitions of the // same template specialization into the same CXXRecordDecl. auto MergedDCIt = Reader.MergedDeclContexts.find(D->getLexicalDeclContext()); if (MergedDCIt != Reader.MergedDeclContexts.end() && !shouldSkipCheckingODR(D) && MergedDCIt->second == D->getDeclContext()) Reader.PendingOdrMergeChecks.push_back(D); return FindExistingResult(Reader, D, /*Existing=*/nullptr, AnonymousDeclNumber, TypedefNameForLinkage); } template Decl *ASTDeclReader::getMostRecentDeclImpl(Redeclarable *D) { return D->RedeclLink.getLatestNotUpdated(); } Decl *ASTDeclReader::getMostRecentDeclImpl(...) { llvm_unreachable("getMostRecentDecl on non-redeclarable declaration"); } Decl *ASTDeclReader::getMostRecentDecl(Decl *D) { assert(D); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ return getMostRecentDeclImpl(cast(D)); #include "clang/AST/DeclNodes.inc" } llvm_unreachable("unknown decl kind"); } Decl *ASTReader::getMostRecentExistingDecl(Decl *D) { return ASTDeclReader::getMostRecentDecl(D->getCanonicalDecl()); } void ASTDeclReader::mergeInheritableAttributes(ASTReader &Reader, Decl *D, Decl *Previous) { InheritableAttr *NewAttr = nullptr; ASTContext &Context = Reader.getContext(); const auto *IA = Previous->getAttr(); if (IA && !D->hasAttr()) { NewAttr = cast(IA->clone(Context)); NewAttr->setInherited(true); D->addAttr(NewAttr); } const auto *AA = Previous->getAttr(); if (AA && !D->hasAttr()) { NewAttr = AA->clone(Context); NewAttr->setInherited(true); D->addAttr(NewAttr); } } template void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { D->RedeclLink.setPrevious(cast(Previous)); D->First = cast(Previous)->First; } namespace clang { template<> void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { auto *VD = static_cast(D); auto *PrevVD = cast(Previous); D->RedeclLink.setPrevious(PrevVD); D->First = PrevVD->First; // We should keep at most one definition on the chain. // FIXME: Cache the definition once we've found it. Building a chain with // N definitions currently takes O(N^2) time here. if (VD->isThisDeclarationADefinition() == VarDecl::Definition) { for (VarDecl *CurD = PrevVD; CurD; CurD = CurD->getPreviousDecl()) { if (CurD->isThisDeclarationADefinition() == VarDecl::Definition) { Reader.mergeDefinitionVisibility(CurD, VD); VD->demoteThisDefinitionToDeclaration(); break; } } } } static bool isUndeducedReturnType(QualType T) { auto *DT = T->getContainedDeducedType(); return DT && !DT->isDeduced(); } template<> void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, Redeclarable *D, Decl *Previous, Decl *Canon) { auto *FD = static_cast(D); auto *PrevFD = cast(Previous); FD->RedeclLink.setPrevious(PrevFD); FD->First = PrevFD->First; // If the previous declaration is an inline function declaration, then this // declaration is too. if (PrevFD->isInlined() != FD->isInlined()) { // FIXME: [dcl.fct.spec]p4: // If a function with external linkage is declared inline in one // translation unit, it shall be declared inline in all translation // units in which it appears. // // Be careful of this case: // // module A: // template struct X { void f(); }; // template inline void X::f() {} // // module B instantiates the declaration of X::f // module C instantiates the definition of X::f // // If module B and C are merged, we do not have a violation of this rule. FD->setImplicitlyInline(true); } auto *FPT = FD->getType()->getAs(); auto *PrevFPT = PrevFD->getType()->getAs(); if (FPT && PrevFPT) { // If we need to propagate an exception specification along the redecl // chain, make a note of that so that we can do so later. bool IsUnresolved = isUnresolvedExceptionSpec(FPT->getExceptionSpecType()); bool WasUnresolved = isUnresolvedExceptionSpec(PrevFPT->getExceptionSpecType()); if (IsUnresolved != WasUnresolved) Reader.PendingExceptionSpecUpdates.insert( {Canon, IsUnresolved ? PrevFD : FD}); // If we need to propagate a deduced return type along the redecl chain, // make a note of that so that we can do it later. bool IsUndeduced = isUndeducedReturnType(FPT->getReturnType()); bool WasUndeduced = isUndeducedReturnType(PrevFPT->getReturnType()); if (IsUndeduced != WasUndeduced) Reader.PendingDeducedTypeUpdates.insert( {cast(Canon), (IsUndeduced ? PrevFPT : FPT)->getReturnType()}); } } } // namespace clang void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) { llvm_unreachable("attachPreviousDecl on non-redeclarable declaration"); } /// Inherit the default template argument from \p From to \p To. Returns /// \c false if there is no default template for \p From. template static bool inheritDefaultTemplateArgument(ASTContext &Context, ParmDecl *From, Decl *ToD) { auto *To = cast(ToD); if (!From->hasDefaultArgument()) return false; To->setInheritedDefaultArgument(Context, From); return true; } static void inheritDefaultTemplateArguments(ASTContext &Context, TemplateDecl *From, TemplateDecl *To) { auto *FromTP = From->getTemplateParameters(); auto *ToTP = To->getTemplateParameters(); assert(FromTP->size() == ToTP->size() && "merged mismatched templates?"); for (unsigned I = 0, N = FromTP->size(); I != N; ++I) { NamedDecl *FromParam = FromTP->getParam(I); NamedDecl *ToParam = ToTP->getParam(I); if (auto *FTTP = dyn_cast(FromParam)) inheritDefaultTemplateArgument(Context, FTTP, ToParam); else if (auto *FNTTP = dyn_cast(FromParam)) inheritDefaultTemplateArgument(Context, FNTTP, ToParam); else inheritDefaultTemplateArgument( Context, cast(FromParam), ToParam); } } // [basic.link]/p10: // If two declarations of an entity are attached to different modules, // the program is ill-formed; static void checkMultipleDefinitionInNamedModules(ASTReader &Reader, Decl *D, Decl *Previous) { Module *M = Previous->getOwningModule(); // We only care about the case in named modules. if (!M || !M->isNamedModule()) return; // If it is previous implcitly introduced, it is not meaningful to // diagnose it. if (Previous->isImplicit()) return; // FIXME: Get rid of the enumeration of decl types once we have an appropriate // abstract for decls of an entity. e.g., the namespace decl and using decl // doesn't introduce an entity. if (!isa(Previous)) return; // Skip implicit instantiations since it may give false positive diagnostic // messages. // FIXME: Maybe this shows the implicit instantiations may have incorrect // module owner ships. But given we've finished the compilation of a module, // how can we add new entities to that module? if (auto *VTSD = dyn_cast(Previous); VTSD && !VTSD->isExplicitSpecialization()) return; if (auto *CTSD = dyn_cast(Previous); CTSD && !CTSD->isExplicitSpecialization()) return; if (auto *Func = dyn_cast(Previous)) if (auto *FTSI = Func->getTemplateSpecializationInfo(); FTSI && !FTSI->isExplicitSpecialization()) return; // It is fine if they are in the same module. if (Reader.getContext().isInSameModule(M, D->getOwningModule())) return; Reader.Diag(Previous->getLocation(), diag::err_multiple_decl_in_different_modules) << cast(Previous) << M->Name; Reader.Diag(D->getLocation(), diag::note_also_found); } void ASTDeclReader::attachPreviousDecl(ASTReader &Reader, Decl *D, Decl *Previous, Decl *Canon) { assert(D && Previous); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ attachPreviousDeclImpl(Reader, cast(D), Previous, Canon); \ break; #include "clang/AST/DeclNodes.inc" } checkMultipleDefinitionInNamedModules(Reader, D, Previous); // If the declaration was visible in one module, a redeclaration of it in // another module remains visible even if it wouldn't be visible by itself. // // FIXME: In this case, the declaration should only be visible if a module // that makes it visible has been imported. D->IdentifierNamespace |= Previous->IdentifierNamespace & (Decl::IDNS_Ordinary | Decl::IDNS_Tag | Decl::IDNS_Type); // If the declaration declares a template, it may inherit default arguments // from the previous declaration. if (auto *TD = dyn_cast(D)) inheritDefaultTemplateArguments(Reader.getContext(), cast(Previous), TD); // If any of the declaration in the chain contains an Inheritable attribute, // it needs to be added to all the declarations in the redeclarable chain. // FIXME: Only the logic of merging MSInheritableAttr is present, it should // be extended for all inheritable attributes. mergeInheritableAttributes(Reader, D, Previous); } template void ASTDeclReader::attachLatestDeclImpl(Redeclarable *D, Decl *Latest) { D->RedeclLink.setLatest(cast(Latest)); } void ASTDeclReader::attachLatestDeclImpl(...) { llvm_unreachable("attachLatestDecl on non-redeclarable declaration"); } void ASTDeclReader::attachLatestDecl(Decl *D, Decl *Latest) { assert(D && Latest); switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ attachLatestDeclImpl(cast(D), Latest); \ break; #include "clang/AST/DeclNodes.inc" } } template void ASTDeclReader::markIncompleteDeclChainImpl(Redeclarable *D) { D->RedeclLink.markIncomplete(); } void ASTDeclReader::markIncompleteDeclChainImpl(...) { llvm_unreachable("markIncompleteDeclChain on non-redeclarable declaration"); } void ASTReader::markIncompleteDeclChain(Decl *D) { switch (D->getKind()) { #define ABSTRACT_DECL(TYPE) #define DECL(TYPE, BASE) \ case Decl::TYPE: \ ASTDeclReader::markIncompleteDeclChainImpl(cast(D)); \ break; #include "clang/AST/DeclNodes.inc" } } /// Read the declaration at the given offset from the AST file. Decl *ASTReader::ReadDeclRecord(GlobalDeclID ID) { SourceLocation DeclLoc; RecordLocation Loc = DeclCursorForID(ID, DeclLoc); llvm::BitstreamCursor &DeclsCursor = Loc.F->DeclsCursor; // Keep track of where we are in the stream, then jump back there // after reading this declaration. SavedStreamPosition SavedPosition(DeclsCursor); ReadingKindTracker ReadingKind(Read_Decl, *this); // Note that we are loading a declaration record. Deserializing ADecl(this); auto Fail = [](const char *what, llvm::Error &&Err) { llvm::report_fatal_error(Twine("ASTReader::readDeclRecord failed ") + what + ": " + toString(std::move(Err))); }; if (llvm::Error JumpFailed = DeclsCursor.JumpToBit(Loc.Offset)) Fail("jumping", std::move(JumpFailed)); ASTRecordReader Record(*this, *Loc.F); ASTDeclReader Reader(*this, Record, Loc, ID, DeclLoc); Expected MaybeCode = DeclsCursor.ReadCode(); if (!MaybeCode) Fail("reading code", MaybeCode.takeError()); unsigned Code = MaybeCode.get(); ASTContext &Context = getContext(); Decl *D = nullptr; Expected MaybeDeclCode = Record.readRecord(DeclsCursor, Code); if (!MaybeDeclCode) llvm::report_fatal_error( Twine("ASTReader::readDeclRecord failed reading decl code: ") + toString(MaybeDeclCode.takeError())); switch ((DeclCode)MaybeDeclCode.get()) { case DECL_CONTEXT_LEXICAL: case DECL_CONTEXT_VISIBLE: llvm_unreachable("Record cannot be de-serialized with readDeclRecord"); case DECL_TYPEDEF: D = TypedefDecl::CreateDeserialized(Context, ID); break; case DECL_TYPEALIAS: D = TypeAliasDecl::CreateDeserialized(Context, ID); break; case DECL_ENUM: D = EnumDecl::CreateDeserialized(Context, ID); break; case DECL_RECORD: D = RecordDecl::CreateDeserialized(Context, ID); break; case DECL_ENUM_CONSTANT: D = EnumConstantDecl::CreateDeserialized(Context, ID); break; case DECL_FUNCTION: D = FunctionDecl::CreateDeserialized(Context, ID); break; case DECL_LINKAGE_SPEC: D = LinkageSpecDecl::CreateDeserialized(Context, ID); break; case DECL_EXPORT: D = ExportDecl::CreateDeserialized(Context, ID); break; case DECL_LABEL: D = LabelDecl::CreateDeserialized(Context, ID); break; case DECL_NAMESPACE: D = NamespaceDecl::CreateDeserialized(Context, ID); break; case DECL_NAMESPACE_ALIAS: D = NamespaceAliasDecl::CreateDeserialized(Context, ID); break; case DECL_USING: D = UsingDecl::CreateDeserialized(Context, ID); break; case DECL_USING_PACK: D = UsingPackDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_USING_SHADOW: D = UsingShadowDecl::CreateDeserialized(Context, ID); break; case DECL_USING_ENUM: D = UsingEnumDecl::CreateDeserialized(Context, ID); break; case DECL_CONSTRUCTOR_USING_SHADOW: D = ConstructorUsingShadowDecl::CreateDeserialized(Context, ID); break; case DECL_USING_DIRECTIVE: D = UsingDirectiveDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_VALUE: D = UnresolvedUsingValueDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_TYPENAME: D = UnresolvedUsingTypenameDecl::CreateDeserialized(Context, ID); break; case DECL_UNRESOLVED_USING_IF_EXISTS: D = UnresolvedUsingIfExistsDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_RECORD: D = CXXRecordDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_DEDUCTION_GUIDE: D = CXXDeductionGuideDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_METHOD: D = CXXMethodDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_CONSTRUCTOR: D = CXXConstructorDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_CXX_DESTRUCTOR: D = CXXDestructorDecl::CreateDeserialized(Context, ID); break; case DECL_CXX_CONVERSION: D = CXXConversionDecl::CreateDeserialized(Context, ID); break; case DECL_ACCESS_SPEC: D = AccessSpecDecl::CreateDeserialized(Context, ID); break; case DECL_FRIEND: D = FriendDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_FRIEND_TEMPLATE: D = FriendTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE: D = ClassTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE_SPECIALIZATION: D = ClassTemplateSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION: D = ClassTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE: D = VarTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE_SPECIALIZATION: D = VarTemplateSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION: D = VarTemplatePartialSpecializationDecl::CreateDeserialized(Context, ID); break; case DECL_FUNCTION_TEMPLATE: D = FunctionTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_TEMPLATE_TYPE_PARM: { bool HasTypeConstraint = Record.readInt(); D = TemplateTypeParmDecl::CreateDeserialized(Context, ID, HasTypeConstraint); break; } case DECL_NON_TYPE_TEMPLATE_PARM: { bool HasTypeConstraint = Record.readInt(); D = NonTypeTemplateParmDecl::CreateDeserialized(Context, ID, HasTypeConstraint); break; } case DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK: { bool HasTypeConstraint = Record.readInt(); D = NonTypeTemplateParmDecl::CreateDeserialized( Context, ID, Record.readInt(), HasTypeConstraint); break; } case DECL_TEMPLATE_TEMPLATE_PARM: D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID); break; case DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK: D = TemplateTemplateParmDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_TYPE_ALIAS_TEMPLATE: D = TypeAliasTemplateDecl::CreateDeserialized(Context, ID); break; case DECL_CONCEPT: D = ConceptDecl::CreateDeserialized(Context, ID); break; case DECL_REQUIRES_EXPR_BODY: D = RequiresExprBodyDecl::CreateDeserialized(Context, ID); break; case DECL_STATIC_ASSERT: D = StaticAssertDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_METHOD: D = ObjCMethodDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_INTERFACE: D = ObjCInterfaceDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_IVAR: D = ObjCIvarDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROTOCOL: D = ObjCProtocolDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_AT_DEFS_FIELD: D = ObjCAtDefsFieldDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_CATEGORY: D = ObjCCategoryDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_CATEGORY_IMPL: D = ObjCCategoryImplDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_IMPLEMENTATION: D = ObjCImplementationDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_COMPATIBLE_ALIAS: D = ObjCCompatibleAliasDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROPERTY: D = ObjCPropertyDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_PROPERTY_IMPL: D = ObjCPropertyImplDecl::CreateDeserialized(Context, ID); break; case DECL_FIELD: D = FieldDecl::CreateDeserialized(Context, ID); break; case DECL_INDIRECTFIELD: D = IndirectFieldDecl::CreateDeserialized(Context, ID); break; case DECL_VAR: D = VarDecl::CreateDeserialized(Context, ID); break; case DECL_IMPLICIT_PARAM: D = ImplicitParamDecl::CreateDeserialized(Context, ID); break; case DECL_PARM_VAR: D = ParmVarDecl::CreateDeserialized(Context, ID); break; case DECL_DECOMPOSITION: D = DecompositionDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_BINDING: D = BindingDecl::CreateDeserialized(Context, ID); break; case DECL_FILE_SCOPE_ASM: D = FileScopeAsmDecl::CreateDeserialized(Context, ID); break; case DECL_TOP_LEVEL_STMT_DECL: D = TopLevelStmtDecl::CreateDeserialized(Context, ID); break; case DECL_BLOCK: D = BlockDecl::CreateDeserialized(Context, ID); break; case DECL_MS_PROPERTY: D = MSPropertyDecl::CreateDeserialized(Context, ID); break; case DECL_MS_GUID: D = MSGuidDecl::CreateDeserialized(Context, ID); break; case DECL_UNNAMED_GLOBAL_CONSTANT: D = UnnamedGlobalConstantDecl::CreateDeserialized(Context, ID); break; case DECL_TEMPLATE_PARAM_OBJECT: D = TemplateParamObjectDecl::CreateDeserialized(Context, ID); break; case DECL_CAPTURED: D = CapturedDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_CXX_BASE_SPECIFIERS: Error("attempt to read a C++ base-specifier record as a declaration"); return nullptr; case DECL_CXX_CTOR_INITIALIZERS: Error("attempt to read a C++ ctor initializer record as a declaration"); return nullptr; case DECL_IMPORT: // Note: last entry of the ImportDecl record is the number of stored source // locations. D = ImportDecl::CreateDeserialized(Context, ID, Record.back()); break; case DECL_OMP_THREADPRIVATE: { Record.skipInts(1); unsigned NumChildren = Record.readInt(); Record.skipInts(1); D = OMPThreadPrivateDecl::CreateDeserialized(Context, ID, NumChildren); break; } case DECL_OMP_ALLOCATE: { unsigned NumClauses = Record.readInt(); unsigned NumVars = Record.readInt(); Record.skipInts(1); D = OMPAllocateDecl::CreateDeserialized(Context, ID, NumVars, NumClauses); break; } case DECL_OMP_REQUIRES: { unsigned NumClauses = Record.readInt(); Record.skipInts(2); D = OMPRequiresDecl::CreateDeserialized(Context, ID, NumClauses); break; } case DECL_OMP_DECLARE_REDUCTION: D = OMPDeclareReductionDecl::CreateDeserialized(Context, ID); break; case DECL_OMP_DECLARE_MAPPER: { unsigned NumClauses = Record.readInt(); Record.skipInts(2); D = OMPDeclareMapperDecl::CreateDeserialized(Context, ID, NumClauses); break; } case DECL_OMP_CAPTUREDEXPR: D = OMPCapturedExprDecl::CreateDeserialized(Context, ID); break; case DECL_PRAGMA_COMMENT: D = PragmaCommentDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_PRAGMA_DETECT_MISMATCH: D = PragmaDetectMismatchDecl::CreateDeserialized(Context, ID, Record.readInt()); break; case DECL_EMPTY: D = EmptyDecl::CreateDeserialized(Context, ID); break; case DECL_LIFETIME_EXTENDED_TEMPORARY: D = LifetimeExtendedTemporaryDecl::CreateDeserialized(Context, ID); break; case DECL_OBJC_TYPE_PARAM: D = ObjCTypeParamDecl::CreateDeserialized(Context, ID); break; case DECL_HLSL_BUFFER: D = HLSLBufferDecl::CreateDeserialized(Context, ID); break; case DECL_IMPLICIT_CONCEPT_SPECIALIZATION: D = ImplicitConceptSpecializationDecl::CreateDeserialized(Context, ID, Record.readInt()); break; } assert(D && "Unknown declaration reading AST file"); LoadedDecl(translateGlobalDeclIDToIndex(ID), D); // Set the DeclContext before doing any deserialization, to make sure internal // calls to Decl::getASTContext() by Decl's methods will find the // TranslationUnitDecl without crashing. D->setDeclContext(Context.getTranslationUnitDecl()); // Reading some declarations can result in deep recursion. clang::runWithSufficientStackSpace([&] { warnStackExhausted(DeclLoc); }, [&] { Reader.Visit(D); }); // If this declaration is also a declaration context, get the // offsets for its tables of lexical and visible declarations. if (auto *DC = dyn_cast(D)) { std::pair Offsets = Reader.VisitDeclContext(DC); // Get the lexical and visible block for the delayed namespace. // It is sufficient to judge if ID is in DelayedNamespaceOffsetMap. // But it may be more efficient to filter the other cases. if (!Offsets.first && !Offsets.second && isa(D)) if (auto Iter = DelayedNamespaceOffsetMap.find(ID); Iter != DelayedNamespaceOffsetMap.end()) Offsets = Iter->second; if (Offsets.first && ReadLexicalDeclContextStorage(*Loc.F, DeclsCursor, Offsets.first, DC)) return nullptr; if (Offsets.second && ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID)) return nullptr; } assert(Record.getIdx() == Record.size()); // Load any relevant update records. PendingUpdateRecords.push_back( PendingUpdateRecord(ID, D, /*JustLoaded=*/true)); // Load the categories after recursive loading is finished. if (auto *Class = dyn_cast(D)) // If we already have a definition when deserializing the ObjCInterfaceDecl, // we put the Decl in PendingDefinitions so we can pull the categories here. if (Class->isThisDeclarationADefinition() || PendingDefinitions.count(Class)) loadObjCCategories(ID, Class); // If we have deserialized a declaration that has a definition the // AST consumer might need to know about, queue it. // We don't pass it to the consumer immediately because we may be in recursive // loading, and some declarations may still be initializing. PotentiallyInterestingDecls.push_back(D); return D; } void ASTReader::PassInterestingDeclsToConsumer() { assert(Consumer); if (PassingDeclsToConsumer) return; // Guard variable to avoid recursively redoing the process of passing // decls to consumer. SaveAndRestore GuardPassingDeclsToConsumer(PassingDeclsToConsumer, true); // Ensure that we've loaded all potentially-interesting declarations // that need to be eagerly loaded. for (auto ID : EagerlyDeserializedDecls) GetDecl(ID); EagerlyDeserializedDecls.clear(); auto ConsumingPotentialInterestingDecls = [this]() { while (!PotentiallyInterestingDecls.empty()) { Decl *D = PotentiallyInterestingDecls.front(); PotentiallyInterestingDecls.pop_front(); if (isConsumerInterestedIn(D)) PassInterestingDeclToConsumer(D); } }; std::deque MaybeInterestingDecls = std::move(PotentiallyInterestingDecls); PotentiallyInterestingDecls.clear(); assert(PotentiallyInterestingDecls.empty()); while (!MaybeInterestingDecls.empty()) { Decl *D = MaybeInterestingDecls.front(); MaybeInterestingDecls.pop_front(); // Since we load the variable's initializers lazily, it'd be problematic // if the initializers dependent on each other. So here we try to load the // initializers of static variables to make sure they are passed to code // generator by order. If we read anything interesting, we would consume // that before emitting the current declaration. if (auto *VD = dyn_cast(D); VD && VD->isFileVarDecl() && !VD->isExternallyVisible()) VD->getInit(); ConsumingPotentialInterestingDecls(); if (isConsumerInterestedIn(D)) PassInterestingDeclToConsumer(D); } // If we add any new potential interesting decl in the last call, consume it. ConsumingPotentialInterestingDecls(); for (GlobalDeclID ID : VTablesToEmit) { auto *RD = cast(GetDecl(ID)); assert(!RD->shouldEmitInExternalSource()); PassVTableToConsumer(RD); } VTablesToEmit.clear(); } void ASTReader::loadDeclUpdateRecords(PendingUpdateRecord &Record) { // The declaration may have been modified by files later in the chain. // If this is the case, read the record containing the updates from each file // and pass it to ASTDeclReader to make the modifications. GlobalDeclID ID = Record.ID; Decl *D = Record.D; ProcessingUpdatesRAIIObj ProcessingUpdates(*this); DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID); SmallVector PendingLazySpecializationIDs; if (UpdI != DeclUpdateOffsets.end()) { auto UpdateOffsets = std::move(UpdI->second); DeclUpdateOffsets.erase(UpdI); // Check if this decl was interesting to the consumer. If we just loaded // the declaration, then we know it was interesting and we skip the call // to isConsumerInterestedIn because it is unsafe to call in the // current ASTReader state. bool WasInteresting = Record.JustLoaded || isConsumerInterestedIn(D); for (auto &FileAndOffset : UpdateOffsets) { ModuleFile *F = FileAndOffset.first; uint64_t Offset = FileAndOffset.second; llvm::BitstreamCursor &Cursor = F->DeclsCursor; SavedStreamPosition SavedPosition(Cursor); if (llvm::Error JumpFailed = Cursor.JumpToBit(Offset)) // FIXME don't do a fatal error. llvm::report_fatal_error( Twine("ASTReader::loadDeclUpdateRecords failed jumping: ") + toString(std::move(JumpFailed))); Expected MaybeCode = Cursor.ReadCode(); if (!MaybeCode) llvm::report_fatal_error( Twine("ASTReader::loadDeclUpdateRecords failed reading code: ") + toString(MaybeCode.takeError())); unsigned Code = MaybeCode.get(); ASTRecordReader Record(*this, *F); if (Expected MaybeRecCode = Record.readRecord(Cursor, Code)) assert(MaybeRecCode.get() == DECL_UPDATES && "Expected DECL_UPDATES record!"); else llvm::report_fatal_error( Twine("ASTReader::loadDeclUpdateRecords failed reading rec code: ") + toString(MaybeCode.takeError())); ASTDeclReader Reader(*this, Record, RecordLocation(F, Offset), ID, SourceLocation()); Reader.UpdateDecl(D, PendingLazySpecializationIDs); // We might have made this declaration interesting. If so, remember that // we need to hand it off to the consumer. if (!WasInteresting && isConsumerInterestedIn(D)) { PotentiallyInterestingDecls.push_back(D); WasInteresting = true; } } } // Add the lazy specializations to the template. assert((PendingLazySpecializationIDs.empty() || isa(D) || isa(D)) && "Must not have pending specializations"); if (auto *CTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(CTD, PendingLazySpecializationIDs); else if (auto *FTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(FTD, PendingLazySpecializationIDs); else if (auto *VTD = dyn_cast(D)) ASTDeclReader::AddLazySpecializations(VTD, PendingLazySpecializationIDs); PendingLazySpecializationIDs.clear(); // Load the pending visible updates for this decl context, if it has any. auto I = PendingVisibleUpdates.find(ID); if (I != PendingVisibleUpdates.end()) { auto VisibleUpdates = std::move(I->second); PendingVisibleUpdates.erase(I); auto *DC = cast(D)->getPrimaryContext(); for (const auto &Update : VisibleUpdates) Lookups[DC].Table.add( Update.Mod, Update.Data, reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod)); DC->setHasExternalVisibleStorage(true); } } void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) { // Attach FirstLocal to the end of the decl chain. Decl *CanonDecl = FirstLocal->getCanonicalDecl(); if (FirstLocal != CanonDecl) { Decl *PrevMostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl); ASTDeclReader::attachPreviousDecl( *this, FirstLocal, PrevMostRecent ? PrevMostRecent : CanonDecl, CanonDecl); } if (!LocalOffset) { ASTDeclReader::attachLatestDecl(CanonDecl, FirstLocal); return; } // Load the list of other redeclarations from this module file. ModuleFile *M = getOwningModuleFile(FirstLocal); assert(M && "imported decl from no module file"); llvm::BitstreamCursor &Cursor = M->DeclsCursor; SavedStreamPosition SavedPosition(Cursor); if (llvm::Error JumpFailed = Cursor.JumpToBit(LocalOffset)) llvm::report_fatal_error( Twine("ASTReader::loadPendingDeclChain failed jumping: ") + toString(std::move(JumpFailed))); RecordData Record; Expected MaybeCode = Cursor.ReadCode(); if (!MaybeCode) llvm::report_fatal_error( Twine("ASTReader::loadPendingDeclChain failed reading code: ") + toString(MaybeCode.takeError())); unsigned Code = MaybeCode.get(); if (Expected MaybeRecCode = Cursor.readRecord(Code, Record)) assert(MaybeRecCode.get() == LOCAL_REDECLARATIONS && "expected LOCAL_REDECLARATIONS record!"); else llvm::report_fatal_error( Twine("ASTReader::loadPendingDeclChain failed reading rec code: ") + toString(MaybeCode.takeError())); // FIXME: We have several different dispatches on decl kind here; maybe // we should instead generate one loop per kind and dispatch up-front? Decl *MostRecent = FirstLocal; for (unsigned I = 0, N = Record.size(); I != N; ++I) { unsigned Idx = N - I - 1; auto *D = ReadDecl(*M, Record, Idx); ASTDeclReader::attachPreviousDecl(*this, D, MostRecent, CanonDecl); MostRecent = D; } ASTDeclReader::attachLatestDecl(CanonDecl, MostRecent); } namespace { /// Given an ObjC interface, goes through the modules and links to the /// interface all the categories for it. class ObjCCategoriesVisitor { ASTReader &Reader; ObjCInterfaceDecl *Interface; llvm::SmallPtrSetImpl &Deserialized; ObjCCategoryDecl *Tail = nullptr; llvm::DenseMap NameCategoryMap; GlobalDeclID InterfaceID; unsigned PreviousGeneration; void add(ObjCCategoryDecl *Cat) { // Only process each category once. if (!Deserialized.erase(Cat)) return; // Check for duplicate categories. if (Cat->getDeclName()) { ObjCCategoryDecl *&Existing = NameCategoryMap[Cat->getDeclName()]; if (Existing && Reader.getOwningModuleFile(Existing) != Reader.getOwningModuleFile(Cat)) { llvm::DenseSet> NonEquivalentDecls; StructuralEquivalenceContext Ctx( Cat->getASTContext(), Existing->getASTContext(), NonEquivalentDecls, StructuralEquivalenceKind::Default, /*StrictTypeSpelling =*/false, /*Complain =*/false, /*ErrorOnTagTypeMismatch =*/true); if (!Ctx.IsEquivalent(Cat, Existing)) { // Warn only if the categories with the same name are different. Reader.Diag(Cat->getLocation(), diag::warn_dup_category_def) << Interface->getDeclName() << Cat->getDeclName(); Reader.Diag(Existing->getLocation(), diag::note_previous_definition); } } else if (!Existing) { // Record this category. Existing = Cat; } } // Add this category to the end of the chain. if (Tail) ASTDeclReader::setNextObjCCategory(Tail, Cat); else Interface->setCategoryListRaw(Cat); Tail = Cat; } public: ObjCCategoriesVisitor( ASTReader &Reader, ObjCInterfaceDecl *Interface, llvm::SmallPtrSetImpl &Deserialized, GlobalDeclID InterfaceID, unsigned PreviousGeneration) : Reader(Reader), Interface(Interface), Deserialized(Deserialized), InterfaceID(InterfaceID), PreviousGeneration(PreviousGeneration) { // Populate the name -> category map with the set of known categories. for (auto *Cat : Interface->known_categories()) { if (Cat->getDeclName()) NameCategoryMap[Cat->getDeclName()] = Cat; // Keep track of the tail of the category list. Tail = Cat; } } bool operator()(ModuleFile &M) { // If we've loaded all of the category information we care about from // this module file, we're done. if (M.Generation <= PreviousGeneration) return true; // Map global ID of the definition down to the local ID used in this // module file. If there is no such mapping, we'll find nothing here // (or in any module it imports). LocalDeclID LocalID = Reader.mapGlobalIDToModuleFileGlobalID(M, InterfaceID); if (LocalID.isInvalid()) return true; // Perform a binary search to find the local redeclarations for this // declaration (if any). const ObjCCategoriesInfo Compare = { LocalID, 0 }; const ObjCCategoriesInfo *Result = std::lower_bound(M.ObjCCategoriesMap, M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap, Compare); if (Result == M.ObjCCategoriesMap + M.LocalNumObjCCategoriesInMap || LocalID != Result->getDefinitionID()) { // We didn't find anything. If the class definition is in this module // file, then the module files it depends on cannot have any categories, // so suppress further lookup. return Reader.isDeclIDFromModule(InterfaceID, M); } // We found something. Dig out all of the categories. unsigned Offset = Result->Offset; unsigned N = M.ObjCCategories[Offset]; M.ObjCCategories[Offset++] = 0; // Don't try to deserialize again for (unsigned I = 0; I != N; ++I) add(Reader.ReadDeclAs(M, M.ObjCCategories, Offset)); return true; } }; } // namespace void ASTReader::loadObjCCategories(GlobalDeclID ID, ObjCInterfaceDecl *D, unsigned PreviousGeneration) { ObjCCategoriesVisitor Visitor(*this, D, CategoriesDeserialized, ID, PreviousGeneration); ModuleMgr.visit(Visitor); } template static void forAllLaterRedecls(DeclT *D, Fn F) { F(D); // Check whether we've already merged D into its redeclaration chain. // MostRecent may or may not be nullptr if D has not been merged. If // not, walk the merged redecl chain and see if it's there. auto *MostRecent = D->getMostRecentDecl(); bool Found = false; for (auto *Redecl = MostRecent; Redecl && !Found; Redecl = Redecl->getPreviousDecl()) Found = (Redecl == D); // If this declaration is merged, apply the functor to all later decls. if (Found) { for (auto *Redecl = MostRecent; Redecl != D; Redecl = Redecl->getPreviousDecl()) F(Redecl); } } void ASTDeclReader::UpdateDecl( Decl *D, llvm::SmallVectorImpl &PendingLazySpecializationIDs) { while (Record.getIdx() < Record.size()) { switch ((DeclUpdateKind)Record.readInt()) { case UPD_CXX_ADDED_IMPLICIT_MEMBER: { auto *RD = cast(D); Decl *MD = Record.readDecl(); assert(MD && "couldn't read decl from update record"); Reader.PendingAddedClassMembers.push_back({RD, MD}); break; } case UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION: // It will be added to the template's lazy specialization set. PendingLazySpecializationIDs.push_back(readDeclID()); break; case UPD_CXX_ADDED_ANONYMOUS_NAMESPACE: { auto *Anon = readDeclAs(); // Each module has its own anonymous namespace, which is disjoint from // any other module's anonymous namespaces, so don't attach the anonymous // namespace at all. if (!Record.isModule()) { if (auto *TU = dyn_cast(D)) TU->setAnonymousNamespace(Anon); else cast(D)->setAnonymousNamespace(Anon); } break; } case UPD_CXX_ADDED_VAR_DEFINITION: { auto *VD = cast(D); VD->NonParmVarDeclBits.IsInline = Record.readInt(); VD->NonParmVarDeclBits.IsInlineSpecified = Record.readInt(); ReadVarDeclInit(VD); break; } case UPD_CXX_POINT_OF_INSTANTIATION: { SourceLocation POI = Record.readSourceLocation(); if (auto *VTSD = dyn_cast(D)) { VTSD->setPointOfInstantiation(POI); } else if (auto *VD = dyn_cast(D)) { MemberSpecializationInfo *MSInfo = VD->getMemberSpecializationInfo(); assert(MSInfo && "No member specialization information"); MSInfo->setPointOfInstantiation(POI); } else { auto *FD = cast(D); if (auto *FTSInfo = FD->TemplateOrSpecialization .dyn_cast()) FTSInfo->setPointOfInstantiation(POI); else FD->TemplateOrSpecialization.get() ->setPointOfInstantiation(POI); } break; } case UPD_CXX_INSTANTIATED_DEFAULT_ARGUMENT: { auto *Param = cast(D); // We have to read the default argument regardless of whether we use it // so that hypothetical further update records aren't messed up. // TODO: Add a function to skip over the next expr record. auto *DefaultArg = Record.readExpr(); // Only apply the update if the parameter still has an uninstantiated // default argument. if (Param->hasUninstantiatedDefaultArg()) Param->setDefaultArg(DefaultArg); break; } case UPD_CXX_INSTANTIATED_DEFAULT_MEMBER_INITIALIZER: { auto *FD = cast(D); auto *DefaultInit = Record.readExpr(); // Only apply the update if the field still has an uninstantiated // default member initializer. if (FD->hasInClassInitializer() && !FD->hasNonNullInClassInitializer()) { if (DefaultInit) FD->setInClassInitializer(DefaultInit); else // Instantiation failed. We can get here if we serialized an AST for // an invalid program. FD->removeInClassInitializer(); } break; } case UPD_CXX_ADDED_FUNCTION_DEFINITION: { auto *FD = cast(D); if (Reader.PendingBodies[FD]) { // FIXME: Maybe check for ODR violations. // It's safe to stop now because this update record is always last. return; } if (Record.readInt()) { // Maintain AST consistency: any later redeclarations of this function // are inline if this one is. (We might have merged another declaration // into this one.) forAllLaterRedecls(FD, [](FunctionDecl *FD) { FD->setImplicitlyInline(); }); } FD->setInnerLocStart(readSourceLocation()); ReadFunctionDefinition(FD); assert(Record.getIdx() == Record.size() && "lazy body must be last"); break; } case UPD_CXX_INSTANTIATED_CLASS_DEFINITION: { auto *RD = cast(D); auto *OldDD = RD->getCanonicalDecl()->DefinitionData; bool HadRealDefinition = OldDD && (OldDD->Definition != RD || !Reader.PendingFakeDefinitionData.count(OldDD)); RD->setParamDestroyedInCallee(Record.readInt()); RD->setArgPassingRestrictions( static_cast(Record.readInt())); ReadCXXRecordDefinition(RD, /*Update*/true); // Visible update is handled separately. uint64_t LexicalOffset = ReadLocalOffset(); if (!HadRealDefinition && LexicalOffset) { Record.readLexicalDeclContextStorage(LexicalOffset, RD); Reader.PendingFakeDefinitionData.erase(OldDD); } auto TSK = (TemplateSpecializationKind)Record.readInt(); SourceLocation POI = readSourceLocation(); if (MemberSpecializationInfo *MSInfo = RD->getMemberSpecializationInfo()) { MSInfo->setTemplateSpecializationKind(TSK); MSInfo->setPointOfInstantiation(POI); } else { auto *Spec = cast(RD); Spec->setTemplateSpecializationKind(TSK); Spec->setPointOfInstantiation(POI); if (Record.readInt()) { auto *PartialSpec = readDeclAs(); SmallVector TemplArgs; Record.readTemplateArgumentList(TemplArgs); auto *TemplArgList = TemplateArgumentList::CreateCopy( Reader.getContext(), TemplArgs); // FIXME: If we already have a partial specialization set, // check that it matches. if (!Spec->getSpecializedTemplateOrPartial() .is()) Spec->setInstantiationOf(PartialSpec, TemplArgList); } } RD->setTagKind(static_cast(Record.readInt())); RD->setLocation(readSourceLocation()); RD->setLocStart(readSourceLocation()); RD->setBraceRange(readSourceRange()); if (Record.readInt()) { AttrVec Attrs; Record.readAttributes(Attrs); // If the declaration already has attributes, we assume that some other // AST file already loaded them. if (!D->hasAttrs()) D->setAttrsImpl(Attrs, Reader.getContext()); } break; } case UPD_CXX_RESOLVED_DTOR_DELETE: { // Set the 'operator delete' directly to avoid emitting another update // record. auto *Del = readDeclAs(); auto *First = cast(D->getCanonicalDecl()); auto *ThisArg = Record.readExpr(); // FIXME: Check consistency if we have an old and new operator delete. if (!First->OperatorDelete) { First->OperatorDelete = Del; First->OperatorDeleteThisArg = ThisArg; } break; } case UPD_CXX_RESOLVED_EXCEPTION_SPEC: { SmallVector ExceptionStorage; auto ESI = Record.readExceptionSpecInfo(ExceptionStorage); // Update this declaration's exception specification, if needed. auto *FD = cast(D); auto *FPT = FD->getType()->castAs(); // FIXME: If the exception specification is already present, check that it // matches. if (isUnresolvedExceptionSpec(FPT->getExceptionSpecType())) { FD->setType(Reader.getContext().getFunctionType( FPT->getReturnType(), FPT->getParamTypes(), FPT->getExtProtoInfo().withExceptionSpec(ESI))); // When we get to the end of deserializing, see if there are other decls // that we need to propagate this exception specification onto. Reader.PendingExceptionSpecUpdates.insert( std::make_pair(FD->getCanonicalDecl(), FD)); } break; } case UPD_CXX_DEDUCED_RETURN_TYPE: { auto *FD = cast(D); QualType DeducedResultType = Record.readType(); Reader.PendingDeducedTypeUpdates.insert( {FD->getCanonicalDecl(), DeducedResultType}); break; } case UPD_DECL_MARKED_USED: // Maintain AST consistency: any later redeclarations are used too. D->markUsed(Reader.getContext()); break; case UPD_MANGLING_NUMBER: Reader.getContext().setManglingNumber(cast(D), Record.readInt()); break; case UPD_STATIC_LOCAL_NUMBER: Reader.getContext().setStaticLocalNumber(cast(D), Record.readInt()); break; case UPD_DECL_MARKED_OPENMP_THREADPRIVATE: D->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit(Reader.getContext(), readSourceRange())); break; case UPD_DECL_MARKED_OPENMP_ALLOCATE: { auto AllocatorKind = static_cast(Record.readInt()); Expr *Allocator = Record.readExpr(); Expr *Alignment = Record.readExpr(); SourceRange SR = readSourceRange(); D->addAttr(OMPAllocateDeclAttr::CreateImplicit( Reader.getContext(), AllocatorKind, Allocator, Alignment, SR)); break; } case UPD_DECL_EXPORTED: { unsigned SubmoduleID = readSubmoduleID(); auto *Exported = cast(D); Module *Owner = SubmoduleID ? Reader.getSubmodule(SubmoduleID) : nullptr; Reader.getContext().mergeDefinitionIntoModule(Exported, Owner); Reader.PendingMergedDefinitionsToDeduplicate.insert(Exported); break; } case UPD_DECL_MARKED_OPENMP_DECLARETARGET: { auto MapType = Record.readEnum(); auto DevType = Record.readEnum(); Expr *IndirectE = Record.readExpr(); bool Indirect = Record.readBool(); unsigned Level = Record.readInt(); D->addAttr(OMPDeclareTargetDeclAttr::CreateImplicit( Reader.getContext(), MapType, DevType, IndirectE, Indirect, Level, readSourceRange())); break; } case UPD_ADDED_ATTR_TO_RECORD: AttrVec Attrs; Record.readAttributes(Attrs); assert(Attrs.size() == 1); D->addAttr(Attrs[0]); break; } } } diff --git a/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp b/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp index 8a4ca54349e3..ff1334340874 100644 --- a/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/contrib/llvm-project/clang/lib/Serialization/ASTWriterDecl.cpp @@ -1,2890 +1,2891 @@ //===--- ASTWriterDecl.cpp - Declaration Serialization --------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements serialization for Declarations. // //===----------------------------------------------------------------------===// #include "ASTCommon.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/DeclVisitor.h" #include "clang/AST/Expr.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/PrettyDeclStackTrace.h" #include "clang/Basic/SourceManager.h" #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ASTRecordWriter.h" #include "llvm/Bitstream/BitstreamWriter.h" #include "llvm/Support/ErrorHandling.h" #include using namespace clang; using namespace serialization; //===----------------------------------------------------------------------===// // Declaration serialization //===----------------------------------------------------------------------===// namespace clang { class ASTDeclWriter : public DeclVisitor { ASTWriter &Writer; ASTContext &Context; ASTRecordWriter Record; serialization::DeclCode Code; unsigned AbbrevToUse; bool GeneratingReducedBMI = false; public: ASTDeclWriter(ASTWriter &Writer, ASTContext &Context, ASTWriter::RecordDataImpl &Record, bool GeneratingReducedBMI) : Writer(Writer), Context(Context), Record(Writer, Record), Code((serialization::DeclCode)0), AbbrevToUse(0), GeneratingReducedBMI(GeneratingReducedBMI) {} uint64_t Emit(Decl *D) { if (!Code) llvm::report_fatal_error(StringRef("unexpected declaration kind '") + D->getDeclKindName() + "'"); return Record.Emit(Code, AbbrevToUse); } void Visit(Decl *D); void VisitDecl(Decl *D); void VisitPragmaCommentDecl(PragmaCommentDecl *D); void VisitPragmaDetectMismatchDecl(PragmaDetectMismatchDecl *D); void VisitTranslationUnitDecl(TranslationUnitDecl *D); void VisitNamedDecl(NamedDecl *D); void VisitLabelDecl(LabelDecl *LD); void VisitNamespaceDecl(NamespaceDecl *D); void VisitUsingDirectiveDecl(UsingDirectiveDecl *D); void VisitNamespaceAliasDecl(NamespaceAliasDecl *D); void VisitTypeDecl(TypeDecl *D); void VisitTypedefNameDecl(TypedefNameDecl *D); void VisitTypedefDecl(TypedefDecl *D); void VisitTypeAliasDecl(TypeAliasDecl *D); void VisitUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *D); void VisitUnresolvedUsingIfExistsDecl(UnresolvedUsingIfExistsDecl *D); void VisitTagDecl(TagDecl *D); void VisitEnumDecl(EnumDecl *D); void VisitRecordDecl(RecordDecl *D); void VisitCXXRecordDecl(CXXRecordDecl *D); void VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D); void VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D); void VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *D); void VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D); void VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D); void VisitValueDecl(ValueDecl *D); void VisitEnumConstantDecl(EnumConstantDecl *D); void VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D); void VisitDeclaratorDecl(DeclaratorDecl *D); void VisitFunctionDecl(FunctionDecl *D); void VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D); void VisitCXXMethodDecl(CXXMethodDecl *D); void VisitCXXConstructorDecl(CXXConstructorDecl *D); void VisitCXXDestructorDecl(CXXDestructorDecl *D); void VisitCXXConversionDecl(CXXConversionDecl *D); void VisitFieldDecl(FieldDecl *D); void VisitMSPropertyDecl(MSPropertyDecl *D); void VisitMSGuidDecl(MSGuidDecl *D); void VisitUnnamedGlobalConstantDecl(UnnamedGlobalConstantDecl *D); void VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D); void VisitIndirectFieldDecl(IndirectFieldDecl *D); void VisitVarDecl(VarDecl *D); void VisitImplicitParamDecl(ImplicitParamDecl *D); void VisitParmVarDecl(ParmVarDecl *D); void VisitDecompositionDecl(DecompositionDecl *D); void VisitBindingDecl(BindingDecl *D); void VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D); void VisitTemplateDecl(TemplateDecl *D); void VisitConceptDecl(ConceptDecl *D); void VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D); void VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D); void VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D); void VisitClassTemplateDecl(ClassTemplateDecl *D); void VisitVarTemplateDecl(VarTemplateDecl *D); void VisitFunctionTemplateDecl(FunctionTemplateDecl *D); void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D); void VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D); void VisitUsingDecl(UsingDecl *D); void VisitUsingEnumDecl(UsingEnumDecl *D); void VisitUsingPackDecl(UsingPackDecl *D); void VisitUsingShadowDecl(UsingShadowDecl *D); void VisitConstructorUsingShadowDecl(ConstructorUsingShadowDecl *D); void VisitLinkageSpecDecl(LinkageSpecDecl *D); void VisitExportDecl(ExportDecl *D); void VisitFileScopeAsmDecl(FileScopeAsmDecl *D); void VisitTopLevelStmtDecl(TopLevelStmtDecl *D); void VisitImportDecl(ImportDecl *D); void VisitAccessSpecDecl(AccessSpecDecl *D); void VisitFriendDecl(FriendDecl *D); void VisitFriendTemplateDecl(FriendTemplateDecl *D); void VisitStaticAssertDecl(StaticAssertDecl *D); void VisitBlockDecl(BlockDecl *D); void VisitCapturedDecl(CapturedDecl *D); void VisitEmptyDecl(EmptyDecl *D); void VisitLifetimeExtendedTemporaryDecl(LifetimeExtendedTemporaryDecl *D); void VisitDeclContext(DeclContext *DC); template void VisitRedeclarable(Redeclarable *D); void VisitHLSLBufferDecl(HLSLBufferDecl *D); // FIXME: Put in the same order is DeclNodes.td? void VisitObjCMethodDecl(ObjCMethodDecl *D); void VisitObjCTypeParamDecl(ObjCTypeParamDecl *D); void VisitObjCContainerDecl(ObjCContainerDecl *D); void VisitObjCInterfaceDecl(ObjCInterfaceDecl *D); void VisitObjCIvarDecl(ObjCIvarDecl *D); void VisitObjCProtocolDecl(ObjCProtocolDecl *D); void VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D); void VisitObjCCategoryDecl(ObjCCategoryDecl *D); void VisitObjCImplDecl(ObjCImplDecl *D); void VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D); void VisitObjCImplementationDecl(ObjCImplementationDecl *D); void VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D); void VisitObjCPropertyDecl(ObjCPropertyDecl *D); void VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D); void VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D); void VisitOMPAllocateDecl(OMPAllocateDecl *D); void VisitOMPRequiresDecl(OMPRequiresDecl *D); void VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D); void VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D); void VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D); /// Add an Objective-C type parameter list to the given record. void AddObjCTypeParamList(ObjCTypeParamList *typeParams) { // Empty type parameter list. if (!typeParams) { Record.push_back(0); return; } Record.push_back(typeParams->size()); for (auto *typeParam : *typeParams) { Record.AddDeclRef(typeParam); } Record.AddSourceLocation(typeParams->getLAngleLoc()); Record.AddSourceLocation(typeParams->getRAngleLoc()); } /// Add to the record the first declaration from each module file that /// provides a declaration of D. The intent is to provide a sufficient /// set such that reloading this set will load all current redeclarations. void AddFirstDeclFromEachModule(const Decl *D, bool IncludeLocal) { llvm::MapVector Firsts; // FIXME: We can skip entries that we know are implied by others. for (const Decl *R = D->getMostRecentDecl(); R; R = R->getPreviousDecl()) { if (R->isFromASTFile()) Firsts[Writer.Chain->getOwningModuleFile(R)] = R; else if (IncludeLocal) Firsts[nullptr] = R; } for (const auto &F : Firsts) Record.AddDeclRef(F.second); } /// Get the specialization decl from an entry in the specialization list. template typename RedeclarableTemplateDecl::SpecEntryTraits::DeclType * getSpecializationDecl(EntryType &T) { return RedeclarableTemplateDecl::SpecEntryTraits::getDecl(&T); } /// Get the list of partial specializations from a template's common ptr. template decltype(T::PartialSpecializations) &getPartialSpecializations(T *Common) { return Common->PartialSpecializations; } ArrayRef getPartialSpecializations(FunctionTemplateDecl::Common *) { return std::nullopt; } template void AddTemplateSpecializations(DeclTy *D) { auto *Common = D->getCommonPtr(); // If we have any lazy specializations, and the external AST source is // our chained AST reader, we can just write out the DeclIDs. Otherwise, // we need to resolve them to actual declarations. if (Writer.Chain != Writer.Context->getExternalSource() && Common->LazySpecializations) { D->LoadLazySpecializations(); assert(!Common->LazySpecializations); } ArrayRef LazySpecializations; if (auto *LS = Common->LazySpecializations) LazySpecializations = llvm::ArrayRef(LS + 1, LS[0].getRawValue()); // Add a slot to the record for the number of specializations. unsigned I = Record.size(); Record.push_back(0); // AddFirstDeclFromEachModule might trigger deserialization, invalidating // *Specializations iterators. llvm::SmallVector Specs; for (auto &Entry : Common->Specializations) Specs.push_back(getSpecializationDecl(Entry)); for (auto &Entry : getPartialSpecializations(Common)) Specs.push_back(getSpecializationDecl(Entry)); for (auto *D : Specs) { assert(D->isCanonicalDecl() && "non-canonical decl in set"); AddFirstDeclFromEachModule(D, /*IncludeLocal*/true); } Record.append( DeclIDIterator(LazySpecializations.begin()), DeclIDIterator(LazySpecializations.end())); // Update the size entry we added earlier. Record[I] = Record.size() - I - 1; } /// Ensure that this template specialization is associated with the specified /// template on reload. void RegisterTemplateSpecialization(const Decl *Template, const Decl *Specialization) { Template = Template->getCanonicalDecl(); // If the canonical template is local, we'll write out this specialization // when we emit it. // FIXME: We can do the same thing if there is any local declaration of // the template, to avoid emitting an update record. if (!Template->isFromASTFile()) return; // We only need to associate the first local declaration of the // specialization. The other declarations will get pulled in by it. if (Writer.getFirstLocalDecl(Specialization) != Specialization) return; Writer.DeclUpdates[Template].push_back(ASTWriter::DeclUpdate( UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION, Specialization)); } }; } bool clang::CanElideDeclDef(const Decl *D) { if (auto *FD = dyn_cast(D)) { if (FD->isInlined() || FD->isConstexpr()) return false; if (FD->isDependentContext()) return false; if (FD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) return false; } if (auto *VD = dyn_cast(D)) { if (!VD->getDeclContext()->getRedeclContext()->isFileContext() || VD->isInline() || VD->isConstexpr() || isa(VD) || // Constant initialized variable may not affect the ABI, but they // may be used in constant evaluation in the frontend, so we have // to remain them. VD->hasConstantInitialization()) return false; if (VD->getTemplateSpecializationKind() == TSK_ImplicitInstantiation) return false; } return true; } void ASTDeclWriter::Visit(Decl *D) { DeclVisitor::Visit(D); // Source locations require array (variable-length) abbreviations. The // abbreviation infrastructure requires that arrays are encoded last, so // we handle it here in the case of those classes derived from DeclaratorDecl if (auto *DD = dyn_cast(D)) { if (auto *TInfo = DD->getTypeSourceInfo()) Record.AddTypeLoc(TInfo->getTypeLoc()); } // Handle FunctionDecl's body here and write it after all other Stmts/Exprs // have been written. We want it last because we will not read it back when // retrieving it from the AST, we'll just lazily set the offset. if (auto *FD = dyn_cast(D)) { if (!GeneratingReducedBMI || !CanElideDeclDef(FD)) { Record.push_back(FD->doesThisDeclarationHaveABody()); if (FD->doesThisDeclarationHaveABody()) Record.AddFunctionDefinition(FD); } else Record.push_back(0); } // Similar to FunctionDecls, handle VarDecl's initializer here and write it // after all other Stmts/Exprs. We will not read the initializer until after // we have finished recursive deserialization, because it can recursively // refer back to the variable. if (auto *VD = dyn_cast(D)) { if (!GeneratingReducedBMI || !CanElideDeclDef(VD)) Record.AddVarDeclInit(VD); else Record.push_back(0); } // And similarly for FieldDecls. We already serialized whether there is a // default member initializer. if (auto *FD = dyn_cast(D)) { if (FD->hasInClassInitializer()) { if (Expr *Init = FD->getInClassInitializer()) { Record.push_back(1); Record.AddStmt(Init); } else { Record.push_back(0); // Initializer has not been instantiated yet. } } } // If this declaration is also a DeclContext, write blocks for the // declarations that lexically stored inside its context and those // declarations that are visible from its context. if (auto *DC = dyn_cast(D)) VisitDeclContext(DC); } void ASTDeclWriter::VisitDecl(Decl *D) { BitsPacker DeclBits; // The order matters here. It will be better to put the bit with higher // probability to be 0 in the end of the bits. // // Since we're using VBR6 format to store it. // It will be pretty effient if all the higher bits are 0. // For example, if we need to pack 8 bits into a value and the stored value // is 0xf0, the actual stored value will be 0b000111'110000, which takes 12 // bits actually. However, if we changed the order to be 0x0f, then we can // store it as 0b001111, which takes 6 bits only now. DeclBits.addBits((uint64_t)D->getModuleOwnershipKind(), /*BitWidth=*/3); DeclBits.addBit(D->isReferenced()); DeclBits.addBit(D->isUsed(false)); DeclBits.addBits(D->getAccess(), /*BitWidth=*/2); DeclBits.addBit(D->isImplicit()); DeclBits.addBit(D->getDeclContext() != D->getLexicalDeclContext()); DeclBits.addBit(D->hasAttrs()); DeclBits.addBit(D->isTopLevelDeclInObjCContainer()); DeclBits.addBit(D->isInvalidDecl()); Record.push_back(DeclBits); Record.AddDeclRef(cast_or_null(D->getDeclContext())); if (D->getDeclContext() != D->getLexicalDeclContext()) Record.AddDeclRef(cast_or_null(D->getLexicalDeclContext())); if (D->hasAttrs()) Record.AddAttributes(D->getAttrs()); Record.push_back(Writer.getSubmoduleID(D->getOwningModule())); // If this declaration injected a name into a context different from its // lexical context, and that context is an imported namespace, we need to // update its visible declarations to include this name. // // This happens when we instantiate a class with a friend declaration or a // function with a local extern declaration, for instance. // // FIXME: Can we handle this in AddedVisibleDecl instead? if (D->isOutOfLine()) { auto *DC = D->getDeclContext(); while (auto *NS = dyn_cast(DC->getRedeclContext())) { if (!NS->isFromASTFile()) break; Writer.UpdatedDeclContexts.insert(NS->getPrimaryContext()); if (!NS->isInlineNamespace()) break; DC = NS->getParent(); } } } void ASTDeclWriter::VisitPragmaCommentDecl(PragmaCommentDecl *D) { StringRef Arg = D->getArg(); Record.push_back(Arg.size()); VisitDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.push_back(D->getCommentKind()); Record.AddString(Arg); Code = serialization::DECL_PRAGMA_COMMENT; } void ASTDeclWriter::VisitPragmaDetectMismatchDecl( PragmaDetectMismatchDecl *D) { StringRef Name = D->getName(); StringRef Value = D->getValue(); Record.push_back(Name.size() + 1 + Value.size()); VisitDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddString(Name); Record.AddString(Value); Code = serialization::DECL_PRAGMA_DETECT_MISMATCH; } void ASTDeclWriter::VisitTranslationUnitDecl(TranslationUnitDecl *D) { llvm_unreachable("Translation units aren't directly serialized"); } void ASTDeclWriter::VisitNamedDecl(NamedDecl *D) { VisitDecl(D); Record.AddDeclarationName(D->getDeclName()); Record.push_back(needsAnonymousDeclarationNumber(D) ? Writer.getAnonymousDeclarationNumber(D) : 0); } void ASTDeclWriter::VisitTypeDecl(TypeDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddTypeRef(QualType(D->getTypeForDecl(), 0)); } void ASTDeclWriter::VisitTypedefNameDecl(TypedefNameDecl *D) { VisitRedeclarable(D); VisitTypeDecl(D); Record.AddTypeSourceInfo(D->getTypeSourceInfo()); Record.push_back(D->isModed()); if (D->isModed()) Record.AddTypeRef(D->getUnderlyingType()); Record.AddDeclRef(D->getAnonDeclWithTypedefName(false)); } void ASTDeclWriter::VisitTypedefDecl(TypedefDecl *D) { VisitTypedefNameDecl(D); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() && !D->isTopLevelDeclInObjCContainer() && !D->isModulePrivate() && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclTypedefAbbrev(); Code = serialization::DECL_TYPEDEF; } void ASTDeclWriter::VisitTypeAliasDecl(TypeAliasDecl *D) { VisitTypedefNameDecl(D); Record.AddDeclRef(D->getDescribedAliasTemplate()); Code = serialization::DECL_TYPEALIAS; } void ASTDeclWriter::VisitTagDecl(TagDecl *D) { static_assert(DeclContext::NumTagDeclBits == 23, "You need to update the serializer after you change the " "TagDeclBits"); VisitRedeclarable(D); VisitTypeDecl(D); Record.push_back(D->getIdentifierNamespace()); BitsPacker TagDeclBits; TagDeclBits.addBits(llvm::to_underlying(D->getTagKind()), /*BitWidth=*/3); TagDeclBits.addBit(!isa(D) ? D->isCompleteDefinition() : 0); TagDeclBits.addBit(D->isEmbeddedInDeclarator()); TagDeclBits.addBit(D->isFreeStanding()); TagDeclBits.addBit(D->isCompleteDefinitionRequired()); TagDeclBits.addBits( D->hasExtInfo() ? 1 : (D->getTypedefNameForAnonDecl() ? 2 : 0), /*BitWidth=*/2); Record.push_back(TagDeclBits); Record.AddSourceRange(D->getBraceRange()); if (D->hasExtInfo()) { Record.AddQualifierInfo(*D->getExtInfo()); } else if (auto *TD = D->getTypedefNameForAnonDecl()) { Record.AddDeclRef(TD); Record.AddIdentifierRef(TD->getDeclName().getAsIdentifierInfo()); } } void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) { static_assert(DeclContext::NumEnumDeclBits == 43, "You need to update the serializer after you change the " "EnumDeclBits"); VisitTagDecl(D); Record.AddTypeSourceInfo(D->getIntegerTypeSourceInfo()); if (!D->getIntegerTypeSourceInfo()) Record.AddTypeRef(D->getIntegerType()); Record.AddTypeRef(D->getPromotionType()); BitsPacker EnumDeclBits; EnumDeclBits.addBits(D->getNumPositiveBits(), /*BitWidth=*/8); EnumDeclBits.addBits(D->getNumNegativeBits(), /*BitWidth=*/8); EnumDeclBits.addBit(D->isScoped()); EnumDeclBits.addBit(D->isScopedUsingClassTag()); EnumDeclBits.addBit(D->isFixed()); Record.push_back(EnumDeclBits); Record.push_back(D->getODRHash()); if (MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo()) { Record.AddDeclRef(MemberInfo->getInstantiatedFrom()); Record.push_back(MemberInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MemberInfo->getPointOfInstantiation()); } else { Record.AddDeclRef(nullptr); } if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isInvalidDecl() && !D->isImplicit() && !D->hasExtInfo() && !D->getTypedefNameForAnonDecl() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isTopLevelDeclInObjCContainer() && !CXXRecordDecl::classofKind(D->getKind()) && !D->getIntegerTypeSourceInfo() && !D->getMemberSpecializationInfo() && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclEnumAbbrev(); Code = serialization::DECL_ENUM; } void ASTDeclWriter::VisitRecordDecl(RecordDecl *D) { static_assert(DeclContext::NumRecordDeclBits == 64, "You need to update the serializer after you change the " "RecordDeclBits"); VisitTagDecl(D); BitsPacker RecordDeclBits; RecordDeclBits.addBit(D->hasFlexibleArrayMember()); RecordDeclBits.addBit(D->isAnonymousStructOrUnion()); RecordDeclBits.addBit(D->hasObjectMember()); RecordDeclBits.addBit(D->hasVolatileMember()); RecordDeclBits.addBit(D->isNonTrivialToPrimitiveDefaultInitialize()); RecordDeclBits.addBit(D->isNonTrivialToPrimitiveCopy()); RecordDeclBits.addBit(D->isNonTrivialToPrimitiveDestroy()); RecordDeclBits.addBit(D->hasNonTrivialToPrimitiveDefaultInitializeCUnion()); RecordDeclBits.addBit(D->hasNonTrivialToPrimitiveDestructCUnion()); RecordDeclBits.addBit(D->hasNonTrivialToPrimitiveCopyCUnion()); RecordDeclBits.addBit(D->isParamDestroyedInCallee()); RecordDeclBits.addBits(llvm::to_underlying(D->getArgPassingRestrictions()), 2); Record.push_back(RecordDeclBits); // Only compute this for C/Objective-C, in C++ this is computed as part // of CXXRecordDecl. if (!isa(D)) Record.push_back(D->getODRHash()); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && !D->isInvalidDecl() && !D->hasExtInfo() && !D->getTypedefNameForAnonDecl() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isTopLevelDeclInObjCContainer() && !CXXRecordDecl::classofKind(D->getKind()) && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclRecordAbbrev(); Code = serialization::DECL_RECORD; } void ASTDeclWriter::VisitValueDecl(ValueDecl *D) { VisitNamedDecl(D); Record.AddTypeRef(D->getType()); } void ASTDeclWriter::VisitEnumConstantDecl(EnumConstantDecl *D) { VisitValueDecl(D); Record.push_back(D->getInitExpr()? 1 : 0); if (D->getInitExpr()) Record.AddStmt(D->getInitExpr()); Record.AddAPSInt(D->getInitVal()); Code = serialization::DECL_ENUM_CONSTANT; } void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { VisitValueDecl(D); Record.AddSourceLocation(D->getInnerLocStart()); Record.push_back(D->hasExtInfo()); if (D->hasExtInfo()) { DeclaratorDecl::ExtInfo *Info = D->getExtInfo(); Record.AddQualifierInfo(*Info); Record.AddStmt(Info->TrailingRequiresClause); } // The location information is deferred until the end of the record. Record.AddTypeRef(D->getTypeSourceInfo() ? D->getTypeSourceInfo()->getType() : QualType()); } void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { static_assert(DeclContext::NumFunctionDeclBits == 44, "You need to update the serializer after you change the " "FunctionDeclBits"); VisitRedeclarable(D); Record.push_back(D->getTemplatedKind()); switch (D->getTemplatedKind()) { case FunctionDecl::TK_NonTemplate: break; case FunctionDecl::TK_DependentNonTemplate: Record.AddDeclRef(D->getInstantiatedFromDecl()); break; case FunctionDecl::TK_FunctionTemplate: Record.AddDeclRef(D->getDescribedFunctionTemplate()); break; case FunctionDecl::TK_MemberSpecialization: { MemberSpecializationInfo *MemberInfo = D->getMemberSpecializationInfo(); Record.AddDeclRef(MemberInfo->getInstantiatedFrom()); Record.push_back(MemberInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MemberInfo->getPointOfInstantiation()); break; } case FunctionDecl::TK_FunctionTemplateSpecialization: { FunctionTemplateSpecializationInfo * FTSInfo = D->getTemplateSpecializationInfo(); RegisterTemplateSpecialization(FTSInfo->getTemplate(), D); Record.AddDeclRef(FTSInfo->getTemplate()); Record.push_back(FTSInfo->getTemplateSpecializationKind()); // Template arguments. Record.AddTemplateArgumentList(FTSInfo->TemplateArguments); // Template args as written. Record.push_back(FTSInfo->TemplateArgumentsAsWritten != nullptr); if (FTSInfo->TemplateArgumentsAsWritten) Record.AddASTTemplateArgumentListInfo( FTSInfo->TemplateArgumentsAsWritten); Record.AddSourceLocation(FTSInfo->getPointOfInstantiation()); if (MemberSpecializationInfo *MemberInfo = FTSInfo->getMemberSpecializationInfo()) { Record.push_back(1); Record.AddDeclRef(MemberInfo->getInstantiatedFrom()); Record.push_back(MemberInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MemberInfo->getPointOfInstantiation()); } else { Record.push_back(0); } if (D->isCanonicalDecl()) { // Write the template that contains the specializations set. We will // add a FunctionTemplateSpecializationInfo to it when reading. Record.AddDeclRef(FTSInfo->getTemplate()->getCanonicalDecl()); } break; } case FunctionDecl::TK_DependentFunctionTemplateSpecialization: { DependentFunctionTemplateSpecializationInfo * DFTSInfo = D->getDependentSpecializationInfo(); // Candidates. Record.push_back(DFTSInfo->getCandidates().size()); for (FunctionTemplateDecl *FTD : DFTSInfo->getCandidates()) Record.AddDeclRef(FTD); // Templates args. Record.push_back(DFTSInfo->TemplateArgumentsAsWritten != nullptr); if (DFTSInfo->TemplateArgumentsAsWritten) Record.AddASTTemplateArgumentListInfo( DFTSInfo->TemplateArgumentsAsWritten); break; } } VisitDeclaratorDecl(D); Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.push_back(D->getIdentifierNamespace()); // The order matters here. It will be better to put the bit with higher // probability to be 0 in the end of the bits. See the comments in VisitDecl // for details. BitsPacker FunctionDeclBits; // FIXME: stable encoding FunctionDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), 3); FunctionDeclBits.addBits((uint32_t)D->getStorageClass(), /*BitWidth=*/3); FunctionDeclBits.addBit(D->isInlineSpecified()); FunctionDeclBits.addBit(D->isInlined()); FunctionDeclBits.addBit(D->hasSkippedBody()); FunctionDeclBits.addBit(D->isVirtualAsWritten()); FunctionDeclBits.addBit(D->isPureVirtual()); FunctionDeclBits.addBit(D->hasInheritedPrototype()); FunctionDeclBits.addBit(D->hasWrittenPrototype()); FunctionDeclBits.addBit(D->isDeletedBit()); FunctionDeclBits.addBit(D->isTrivial()); FunctionDeclBits.addBit(D->isTrivialForCall()); FunctionDeclBits.addBit(D->isDefaulted()); FunctionDeclBits.addBit(D->isExplicitlyDefaulted()); FunctionDeclBits.addBit(D->isIneligibleOrNotSelected()); FunctionDeclBits.addBits((uint64_t)(D->getConstexprKind()), /*BitWidth=*/2); FunctionDeclBits.addBit(D->hasImplicitReturnZero()); FunctionDeclBits.addBit(D->isMultiVersion()); FunctionDeclBits.addBit(D->isLateTemplateParsed()); FunctionDeclBits.addBit(D->FriendConstraintRefersToEnclosingTemplate()); FunctionDeclBits.addBit(D->usesSEHTry()); Record.push_back(FunctionDeclBits); Record.AddSourceLocation(D->getEndLoc()); if (D->isExplicitlyDefaulted()) Record.AddSourceLocation(D->getDefaultLoc()); Record.push_back(D->getODRHash()); if (D->isDefaulted() || D->isDeletedAsWritten()) { if (auto *FDI = D->getDefalutedOrDeletedInfo()) { // Store both that there is an DefaultedOrDeletedInfo and whether it // contains a DeletedMessage. StringLiteral *DeletedMessage = FDI->getDeletedMessage(); Record.push_back(1 | (DeletedMessage ? 2 : 0)); if (DeletedMessage) Record.AddStmt(DeletedMessage); Record.push_back(FDI->getUnqualifiedLookups().size()); for (DeclAccessPair P : FDI->getUnqualifiedLookups()) { Record.AddDeclRef(P.getDecl()); Record.push_back(P.getAccess()); } } else { Record.push_back(0); } } Record.push_back(D->param_size()); for (auto *P : D->parameters()) Record.AddDeclRef(P); Code = serialization::DECL_FUNCTION; } static void addExplicitSpecifier(ExplicitSpecifier ES, ASTRecordWriter &Record) { uint64_t Kind = static_cast(ES.getKind()); Kind = Kind << 1 | static_cast(ES.getExpr()); Record.push_back(Kind); if (ES.getExpr()) { Record.AddStmt(ES.getExpr()); } } void ASTDeclWriter::VisitCXXDeductionGuideDecl(CXXDeductionGuideDecl *D) { addExplicitSpecifier(D->getExplicitSpecifier(), Record); Record.AddDeclRef(D->Ctor); VisitFunctionDecl(D); Record.push_back(static_cast(D->getDeductionCandidateKind())); Code = serialization::DECL_CXX_DEDUCTION_GUIDE; } void ASTDeclWriter::VisitObjCMethodDecl(ObjCMethodDecl *D) { static_assert(DeclContext::NumObjCMethodDeclBits == 37, "You need to update the serializer after you change the " "ObjCMethodDeclBits"); VisitNamedDecl(D); // FIXME: convert to LazyStmtPtr? // Unlike C/C++, method bodies will never be in header files. bool HasBodyStuff = D->getBody() != nullptr; Record.push_back(HasBodyStuff); if (HasBodyStuff) { Record.AddStmt(D->getBody()); } Record.AddDeclRef(D->getSelfDecl()); Record.AddDeclRef(D->getCmdDecl()); Record.push_back(D->isInstanceMethod()); Record.push_back(D->isVariadic()); Record.push_back(D->isPropertyAccessor()); Record.push_back(D->isSynthesizedAccessorStub()); Record.push_back(D->isDefined()); Record.push_back(D->isOverriding()); Record.push_back(D->hasSkippedBody()); Record.push_back(D->isRedeclaration()); Record.push_back(D->hasRedeclaration()); if (D->hasRedeclaration()) { assert(Context.getObjCMethodRedeclaration(D)); Record.AddDeclRef(Context.getObjCMethodRedeclaration(D)); } // FIXME: stable encoding for @required/@optional Record.push_back(llvm::to_underlying(D->getImplementationControl())); // FIXME: stable encoding for in/out/inout/bycopy/byref/oneway/nullability Record.push_back(D->getObjCDeclQualifier()); Record.push_back(D->hasRelatedResultType()); Record.AddTypeRef(D->getReturnType()); Record.AddTypeSourceInfo(D->getReturnTypeSourceInfo()); Record.AddSourceLocation(D->getEndLoc()); Record.push_back(D->param_size()); for (const auto *P : D->parameters()) Record.AddDeclRef(P); Record.push_back(D->getSelLocsKind()); unsigned NumStoredSelLocs = D->getNumStoredSelLocs(); SourceLocation *SelLocs = D->getStoredSelLocs(); Record.push_back(NumStoredSelLocs); for (unsigned i = 0; i != NumStoredSelLocs; ++i) Record.AddSourceLocation(SelLocs[i]); Code = serialization::DECL_OBJC_METHOD; } void ASTDeclWriter::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) { VisitTypedefNameDecl(D); Record.push_back(D->Variance); Record.push_back(D->Index); Record.AddSourceLocation(D->VarianceLoc); Record.AddSourceLocation(D->ColonLoc); Code = serialization::DECL_OBJC_TYPE_PARAM; } void ASTDeclWriter::VisitObjCContainerDecl(ObjCContainerDecl *D) { static_assert(DeclContext::NumObjCContainerDeclBits == 64, "You need to update the serializer after you change the " "ObjCContainerDeclBits"); VisitNamedDecl(D); Record.AddSourceLocation(D->getAtStartLoc()); Record.AddSourceRange(D->getAtEndRange()); // Abstract class (no need to define a stable serialization::DECL code). } void ASTDeclWriter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *D) { VisitRedeclarable(D); VisitObjCContainerDecl(D); Record.AddTypeRef(QualType(D->getTypeForDecl(), 0)); AddObjCTypeParamList(D->TypeParamList); Record.push_back(D->isThisDeclarationADefinition()); if (D->isThisDeclarationADefinition()) { // Write the DefinitionData ObjCInterfaceDecl::DefinitionData &Data = D->data(); Record.AddTypeSourceInfo(D->getSuperClassTInfo()); Record.AddSourceLocation(D->getEndOfDefinitionLoc()); Record.push_back(Data.HasDesignatedInitializers); Record.push_back(D->getODRHash()); // Write out the protocols that are directly referenced by the @interface. Record.push_back(Data.ReferencedProtocols.size()); for (const auto *P : D->protocols()) Record.AddDeclRef(P); for (const auto &PL : D->protocol_locs()) Record.AddSourceLocation(PL); // Write out the protocols that are transitively referenced. Record.push_back(Data.AllReferencedProtocols.size()); for (ObjCList::iterator P = Data.AllReferencedProtocols.begin(), PEnd = Data.AllReferencedProtocols.end(); P != PEnd; ++P) Record.AddDeclRef(*P); if (ObjCCategoryDecl *Cat = D->getCategoryListRaw()) { // Ensure that we write out the set of categories for this class. Writer.ObjCClassesWithCategories.insert(D); // Make sure that the categories get serialized. for (; Cat; Cat = Cat->getNextClassCategoryRaw()) (void)Writer.GetDeclRef(Cat); } } Code = serialization::DECL_OBJC_INTERFACE; } void ASTDeclWriter::VisitObjCIvarDecl(ObjCIvarDecl *D) { VisitFieldDecl(D); // FIXME: stable encoding for @public/@private/@protected/@package Record.push_back(D->getAccessControl()); Record.push_back(D->getSynthesize()); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && !D->isUsed(false) && !D->isInvalidDecl() && !D->isReferenced() && !D->isModulePrivate() && !D->getBitWidth() && !D->hasExtInfo() && D->getDeclName()) AbbrevToUse = Writer.getDeclObjCIvarAbbrev(); Code = serialization::DECL_OBJC_IVAR; } void ASTDeclWriter::VisitObjCProtocolDecl(ObjCProtocolDecl *D) { VisitRedeclarable(D); VisitObjCContainerDecl(D); Record.push_back(D->isThisDeclarationADefinition()); if (D->isThisDeclarationADefinition()) { Record.push_back(D->protocol_size()); for (const auto *I : D->protocols()) Record.AddDeclRef(I); for (const auto &PL : D->protocol_locs()) Record.AddSourceLocation(PL); Record.push_back(D->getODRHash()); } Code = serialization::DECL_OBJC_PROTOCOL; } void ASTDeclWriter::VisitObjCAtDefsFieldDecl(ObjCAtDefsFieldDecl *D) { VisitFieldDecl(D); Code = serialization::DECL_OBJC_AT_DEFS_FIELD; } void ASTDeclWriter::VisitObjCCategoryDecl(ObjCCategoryDecl *D) { VisitObjCContainerDecl(D); Record.AddSourceLocation(D->getCategoryNameLoc()); Record.AddSourceLocation(D->getIvarLBraceLoc()); Record.AddSourceLocation(D->getIvarRBraceLoc()); Record.AddDeclRef(D->getClassInterface()); AddObjCTypeParamList(D->TypeParamList); Record.push_back(D->protocol_size()); for (const auto *I : D->protocols()) Record.AddDeclRef(I); for (const auto &PL : D->protocol_locs()) Record.AddSourceLocation(PL); Code = serialization::DECL_OBJC_CATEGORY; } void ASTDeclWriter::VisitObjCCompatibleAliasDecl(ObjCCompatibleAliasDecl *D) { VisitNamedDecl(D); Record.AddDeclRef(D->getClassInterface()); Code = serialization::DECL_OBJC_COMPATIBLE_ALIAS; } void ASTDeclWriter::VisitObjCPropertyDecl(ObjCPropertyDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getAtLoc()); Record.AddSourceLocation(D->getLParenLoc()); Record.AddTypeRef(D->getType()); Record.AddTypeSourceInfo(D->getTypeSourceInfo()); // FIXME: stable encoding Record.push_back((unsigned)D->getPropertyAttributes()); Record.push_back((unsigned)D->getPropertyAttributesAsWritten()); // FIXME: stable encoding Record.push_back((unsigned)D->getPropertyImplementation()); Record.AddDeclarationName(D->getGetterName()); Record.AddSourceLocation(D->getGetterNameLoc()); Record.AddDeclarationName(D->getSetterName()); Record.AddSourceLocation(D->getSetterNameLoc()); Record.AddDeclRef(D->getGetterMethodDecl()); Record.AddDeclRef(D->getSetterMethodDecl()); Record.AddDeclRef(D->getPropertyIvarDecl()); Code = serialization::DECL_OBJC_PROPERTY; } void ASTDeclWriter::VisitObjCImplDecl(ObjCImplDecl *D) { VisitObjCContainerDecl(D); Record.AddDeclRef(D->getClassInterface()); // Abstract class (no need to define a stable serialization::DECL code). } void ASTDeclWriter::VisitObjCCategoryImplDecl(ObjCCategoryImplDecl *D) { VisitObjCImplDecl(D); Record.AddSourceLocation(D->getCategoryNameLoc()); Code = serialization::DECL_OBJC_CATEGORY_IMPL; } void ASTDeclWriter::VisitObjCImplementationDecl(ObjCImplementationDecl *D) { VisitObjCImplDecl(D); Record.AddDeclRef(D->getSuperClass()); Record.AddSourceLocation(D->getSuperClassLoc()); Record.AddSourceLocation(D->getIvarLBraceLoc()); Record.AddSourceLocation(D->getIvarRBraceLoc()); Record.push_back(D->hasNonZeroConstructors()); Record.push_back(D->hasDestructors()); Record.push_back(D->NumIvarInitializers); if (D->NumIvarInitializers) Record.AddCXXCtorInitializers( llvm::ArrayRef(D->init_begin(), D->init_end())); Code = serialization::DECL_OBJC_IMPLEMENTATION; } void ASTDeclWriter::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) { VisitDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddDeclRef(D->getPropertyDecl()); Record.AddDeclRef(D->getPropertyIvarDecl()); Record.AddSourceLocation(D->getPropertyIvarDeclLoc()); Record.AddDeclRef(D->getGetterMethodDecl()); Record.AddDeclRef(D->getSetterMethodDecl()); Record.AddStmt(D->getGetterCXXConstructor()); Record.AddStmt(D->getSetterCXXAssignment()); Code = serialization::DECL_OBJC_PROPERTY_IMPL; } void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) { VisitDeclaratorDecl(D); Record.push_back(D->isMutable()); Record.push_back((D->StorageKind << 1) | D->BitField); if (D->StorageKind == FieldDecl::ISK_CapturedVLAType) Record.AddTypeRef(QualType(D->getCapturedVLAType(), 0)); else if (D->BitField) Record.AddStmt(D->getBitWidth()); if (!D->getDeclName()) Record.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isImplicit() && !D->isUsed(false) && !D->isInvalidDecl() && !D->isReferenced() && !D->isTopLevelDeclInObjCContainer() && !D->isModulePrivate() && !D->getBitWidth() && !D->hasInClassInitializer() && !D->hasCapturedVLAType() && !D->hasExtInfo() && !ObjCIvarDecl::classofKind(D->getKind()) && !ObjCAtDefsFieldDecl::classofKind(D->getKind()) && D->getDeclName()) AbbrevToUse = Writer.getDeclFieldAbbrev(); Code = serialization::DECL_FIELD; } void ASTDeclWriter::VisitMSPropertyDecl(MSPropertyDecl *D) { VisitDeclaratorDecl(D); Record.AddIdentifierRef(D->getGetterId()); Record.AddIdentifierRef(D->getSetterId()); Code = serialization::DECL_MS_PROPERTY; } void ASTDeclWriter::VisitMSGuidDecl(MSGuidDecl *D) { VisitValueDecl(D); MSGuidDecl::Parts Parts = D->getParts(); Record.push_back(Parts.Part1); Record.push_back(Parts.Part2); Record.push_back(Parts.Part3); Record.append(std::begin(Parts.Part4And5), std::end(Parts.Part4And5)); Code = serialization::DECL_MS_GUID; } void ASTDeclWriter::VisitUnnamedGlobalConstantDecl( UnnamedGlobalConstantDecl *D) { VisitValueDecl(D); Record.AddAPValue(D->getValue()); Code = serialization::DECL_UNNAMED_GLOBAL_CONSTANT; } void ASTDeclWriter::VisitTemplateParamObjectDecl(TemplateParamObjectDecl *D) { VisitValueDecl(D); Record.AddAPValue(D->getValue()); Code = serialization::DECL_TEMPLATE_PARAM_OBJECT; } void ASTDeclWriter::VisitIndirectFieldDecl(IndirectFieldDecl *D) { VisitValueDecl(D); Record.push_back(D->getChainingSize()); for (const auto *P : D->chain()) Record.AddDeclRef(P); Code = serialization::DECL_INDIRECTFIELD; } void ASTDeclWriter::VisitVarDecl(VarDecl *D) { VisitRedeclarable(D); VisitDeclaratorDecl(D); // The order matters here. It will be better to put the bit with higher // probability to be 0 in the end of the bits. See the comments in VisitDecl // for details. BitsPacker VarDeclBits; VarDeclBits.addBits(llvm::to_underlying(D->getLinkageInternal()), /*BitWidth=*/3); bool ModulesCodegen = false; if (Writer.WritingModule && D->getStorageDuration() == SD_Static && !D->getDescribedVarTemplate()) { // When building a C++20 module interface unit or a partition unit, a // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline variables are still // emitted in module users.) ModulesCodegen = (Writer.WritingModule->isInterfaceOrPartition() || (D->hasAttr() && Writer.Context->getLangOpts().BuildingPCHWithObjectFile)) && Writer.Context->GetGVALinkageForVariable(D) >= GVA_StrongExternal; } VarDeclBits.addBit(ModulesCodegen); VarDeclBits.addBits(D->getStorageClass(), /*BitWidth=*/3); VarDeclBits.addBits(D->getTSCSpec(), /*BitWidth=*/2); VarDeclBits.addBits(D->getInitStyle(), /*BitWidth=*/2); VarDeclBits.addBit(D->isARCPseudoStrong()); bool HasDeducedType = false; if (!isa(D)) { VarDeclBits.addBit(D->isThisDeclarationADemotedDefinition()); VarDeclBits.addBit(D->isExceptionVariable()); VarDeclBits.addBit(D->isNRVOVariable()); VarDeclBits.addBit(D->isCXXForRangeDecl()); VarDeclBits.addBit(D->isInline()); VarDeclBits.addBit(D->isInlineSpecified()); VarDeclBits.addBit(D->isConstexpr()); VarDeclBits.addBit(D->isInitCapture()); VarDeclBits.addBit(D->isPreviousDeclInSameBlockScope()); VarDeclBits.addBit(D->isEscapingByref()); HasDeducedType = D->getType()->getContainedDeducedType(); VarDeclBits.addBit(HasDeducedType); if (const auto *IPD = dyn_cast(D)) VarDeclBits.addBits(llvm::to_underlying(IPD->getParameterKind()), /*Width=*/3); else VarDeclBits.addBits(0, /*Width=*/3); VarDeclBits.addBit(D->isObjCForDecl()); } Record.push_back(VarDeclBits); if (ModulesCodegen) Writer.AddDeclRef(D, Writer.ModularCodegenDecls); if (D->hasAttr()) { BlockVarCopyInit Init = Writer.Context->getBlockVarCopyInit(D); Record.AddStmt(Init.getCopyExpr()); if (Init.getCopyExpr()) Record.push_back(Init.canThrow()); } enum { VarNotTemplate = 0, VarTemplate, StaticDataMemberSpecialization }; if (VarTemplateDecl *TemplD = D->getDescribedVarTemplate()) { Record.push_back(VarTemplate); Record.AddDeclRef(TemplD); } else if (MemberSpecializationInfo *SpecInfo = D->getMemberSpecializationInfo()) { Record.push_back(StaticDataMemberSpecialization); Record.AddDeclRef(SpecInfo->getInstantiatedFrom()); Record.push_back(SpecInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(SpecInfo->getPointOfInstantiation()); } else { Record.push_back(VarNotTemplate); } if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier && !D->hasExtInfo() && D->getFirstDecl() == D->getMostRecentDecl() && D->getKind() == Decl::Var && !D->isInline() && !D->isConstexpr() && !D->isInitCapture() && !D->isPreviousDeclInSameBlockScope() && !D->isEscapingByref() && !HasDeducedType && D->getStorageDuration() != SD_Static && !D->getDescribedVarTemplate() && !D->getMemberSpecializationInfo() && !D->isObjCForDecl() && !isa(D) && !D->isEscapingByref()) AbbrevToUse = Writer.getDeclVarAbbrev(); Code = serialization::DECL_VAR; } void ASTDeclWriter::VisitImplicitParamDecl(ImplicitParamDecl *D) { VisitVarDecl(D); Code = serialization::DECL_IMPLICIT_PARAM; } void ASTDeclWriter::VisitParmVarDecl(ParmVarDecl *D) { VisitVarDecl(D); // See the implementation of `ParmVarDecl::getParameterIndex()`, which may // exceed the size of the normal bitfield. So it may be better to not pack // these bits. Record.push_back(D->getFunctionScopeIndex()); BitsPacker ParmVarDeclBits; ParmVarDeclBits.addBit(D->isObjCMethodParameter()); ParmVarDeclBits.addBits(D->getFunctionScopeDepth(), /*BitsWidth=*/7); // FIXME: stable encoding ParmVarDeclBits.addBits(D->getObjCDeclQualifier(), /*BitsWidth=*/7); ParmVarDeclBits.addBit(D->isKNRPromoted()); ParmVarDeclBits.addBit(D->hasInheritedDefaultArg()); ParmVarDeclBits.addBit(D->hasUninstantiatedDefaultArg()); ParmVarDeclBits.addBit(D->getExplicitObjectParamThisLoc().isValid()); Record.push_back(ParmVarDeclBits); if (D->hasUninstantiatedDefaultArg()) Record.AddStmt(D->getUninstantiatedDefaultArg()); if (D->getExplicitObjectParamThisLoc().isValid()) Record.AddSourceLocation(D->getExplicitObjectParamThisLoc()); Code = serialization::DECL_PARM_VAR; // If the assumptions about the DECL_PARM_VAR abbrev are true, use it. Here // we dynamically check for the properties that we optimize for, but don't // know are true of all PARM_VAR_DECLs. if (D->getDeclContext() == D->getLexicalDeclContext() && !D->hasAttrs() && !D->hasExtInfo() && D->getStorageClass() == 0 && !D->isInvalidDecl() && !D->isTopLevelDeclInObjCContainer() && D->getInitStyle() == VarDecl::CInit && // Can params have anything else? D->getInit() == nullptr) // No default expr. AbbrevToUse = Writer.getDeclParmVarAbbrev(); // Check things we know are true of *every* PARM_VAR_DECL, which is more than // just us assuming it. assert(!D->getTSCSpec() && "PARM_VAR_DECL can't use TLS"); assert(!D->isThisDeclarationADemotedDefinition() && "PARM_VAR_DECL can't be demoted definition."); assert(D->getAccess() == AS_none && "PARM_VAR_DECL can't be public/private"); assert(!D->isExceptionVariable() && "PARM_VAR_DECL can't be exception var"); assert(D->getPreviousDecl() == nullptr && "PARM_VAR_DECL can't be redecl"); assert(!D->isStaticDataMember() && "PARM_VAR_DECL can't be static data member"); } void ASTDeclWriter::VisitDecompositionDecl(DecompositionDecl *D) { // Record the number of bindings first to simplify deserialization. Record.push_back(D->bindings().size()); VisitVarDecl(D); for (auto *B : D->bindings()) Record.AddDeclRef(B); Code = serialization::DECL_DECOMPOSITION; } void ASTDeclWriter::VisitBindingDecl(BindingDecl *D) { VisitValueDecl(D); Record.AddStmt(D->getBinding()); Code = serialization::DECL_BINDING; } void ASTDeclWriter::VisitFileScopeAsmDecl(FileScopeAsmDecl *D) { VisitDecl(D); Record.AddStmt(D->getAsmString()); Record.AddSourceLocation(D->getRParenLoc()); Code = serialization::DECL_FILE_SCOPE_ASM; } void ASTDeclWriter::VisitTopLevelStmtDecl(TopLevelStmtDecl *D) { VisitDecl(D); Record.AddStmt(D->getStmt()); Code = serialization::DECL_TOP_LEVEL_STMT_DECL; } void ASTDeclWriter::VisitEmptyDecl(EmptyDecl *D) { VisitDecl(D); Code = serialization::DECL_EMPTY; } void ASTDeclWriter::VisitLifetimeExtendedTemporaryDecl( LifetimeExtendedTemporaryDecl *D) { VisitDecl(D); Record.AddDeclRef(D->getExtendingDecl()); Record.AddStmt(D->getTemporaryExpr()); Record.push_back(static_cast(D->getValue())); if (D->getValue()) Record.AddAPValue(*D->getValue()); Record.push_back(D->getManglingNumber()); Code = serialization::DECL_LIFETIME_EXTENDED_TEMPORARY; } void ASTDeclWriter::VisitBlockDecl(BlockDecl *D) { VisitDecl(D); Record.AddStmt(D->getBody()); Record.AddTypeSourceInfo(D->getSignatureAsWritten()); Record.push_back(D->param_size()); for (ParmVarDecl *P : D->parameters()) Record.AddDeclRef(P); Record.push_back(D->isVariadic()); Record.push_back(D->blockMissingReturnType()); Record.push_back(D->isConversionFromLambda()); Record.push_back(D->doesNotEscape()); Record.push_back(D->canAvoidCopyToHeap()); Record.push_back(D->capturesCXXThis()); Record.push_back(D->getNumCaptures()); for (const auto &capture : D->captures()) { Record.AddDeclRef(capture.getVariable()); unsigned flags = 0; if (capture.isByRef()) flags |= 1; if (capture.isNested()) flags |= 2; if (capture.hasCopyExpr()) flags |= 4; Record.push_back(flags); if (capture.hasCopyExpr()) Record.AddStmt(capture.getCopyExpr()); } Code = serialization::DECL_BLOCK; } void ASTDeclWriter::VisitCapturedDecl(CapturedDecl *CD) { Record.push_back(CD->getNumParams()); VisitDecl(CD); Record.push_back(CD->getContextParamPosition()); Record.push_back(CD->isNothrow() ? 1 : 0); // Body is stored by VisitCapturedStmt. for (unsigned I = 0; I < CD->getNumParams(); ++I) Record.AddDeclRef(CD->getParam(I)); Code = serialization::DECL_CAPTURED; } void ASTDeclWriter::VisitLinkageSpecDecl(LinkageSpecDecl *D) { static_assert(DeclContext::NumLinkageSpecDeclBits == 17, "You need to update the serializer after you change the" "LinkageSpecDeclBits"); VisitDecl(D); Record.push_back(llvm::to_underlying(D->getLanguage())); Record.AddSourceLocation(D->getExternLoc()); Record.AddSourceLocation(D->getRBraceLoc()); Code = serialization::DECL_LINKAGE_SPEC; } void ASTDeclWriter::VisitExportDecl(ExportDecl *D) { VisitDecl(D); Record.AddSourceLocation(D->getRBraceLoc()); Code = serialization::DECL_EXPORT; } void ASTDeclWriter::VisitLabelDecl(LabelDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Code = serialization::DECL_LABEL; } void ASTDeclWriter::VisitNamespaceDecl(NamespaceDecl *D) { VisitRedeclarable(D); VisitNamedDecl(D); BitsPacker NamespaceDeclBits; NamespaceDeclBits.addBit(D->isInline()); NamespaceDeclBits.addBit(D->isNested()); Record.push_back(NamespaceDeclBits); Record.AddSourceLocation(D->getBeginLoc()); Record.AddSourceLocation(D->getRBraceLoc()); if (D->isFirstDecl()) Record.AddDeclRef(D->getAnonymousNamespace()); Code = serialization::DECL_NAMESPACE; if (Writer.hasChain() && D->isAnonymousNamespace() && D == D->getMostRecentDecl()) { // This is a most recent reopening of the anonymous namespace. If its parent // is in a previous PCH (or is the TU), mark that parent for update, because // the original namespace always points to the latest re-opening of its // anonymous namespace. Decl *Parent = cast( D->getParent()->getRedeclContext()->getPrimaryContext()); if (Parent->isFromASTFile() || isa(Parent)) { Writer.DeclUpdates[Parent].push_back( ASTWriter::DeclUpdate(UPD_CXX_ADDED_ANONYMOUS_NAMESPACE, D)); } } } void ASTDeclWriter::VisitNamespaceAliasDecl(NamespaceAliasDecl *D) { VisitRedeclarable(D); VisitNamedDecl(D); Record.AddSourceLocation(D->getNamespaceLoc()); Record.AddSourceLocation(D->getTargetNameLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclRef(D->getNamespace()); Code = serialization::DECL_NAMESPACE_ALIAS; } void ASTDeclWriter::VisitUsingDecl(UsingDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); Record.push_back(D->hasTypename()); Record.AddDeclRef(Context.getInstantiatedFromUsingDecl(D)); Code = serialization::DECL_USING; } void ASTDeclWriter::VisitUsingEnumDecl(UsingEnumDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddSourceLocation(D->getEnumLoc()); Record.AddTypeSourceInfo(D->getEnumType()); Record.AddDeclRef(D->FirstUsingShadow.getPointer()); Record.AddDeclRef(Context.getInstantiatedFromUsingEnumDecl(D)); Code = serialization::DECL_USING_ENUM; } void ASTDeclWriter::VisitUsingPackDecl(UsingPackDecl *D) { Record.push_back(D->NumExpansions); VisitNamedDecl(D); Record.AddDeclRef(D->getInstantiatedFromUsingDecl()); for (auto *E : D->expansions()) Record.AddDeclRef(E); Code = serialization::DECL_USING_PACK; } void ASTDeclWriter::VisitUsingShadowDecl(UsingShadowDecl *D) { VisitRedeclarable(D); VisitNamedDecl(D); Record.AddDeclRef(D->getTargetDecl()); Record.push_back(D->getIdentifierNamespace()); Record.AddDeclRef(D->UsingOrNextShadow); Record.AddDeclRef(Context.getInstantiatedFromUsingShadowDecl(D)); if (D->getDeclContext() == D->getLexicalDeclContext() && D->getFirstDecl() == D->getMostRecentDecl() && !D->hasAttrs() && !needsAnonymousDeclarationNumber(D) && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclUsingShadowAbbrev(); Code = serialization::DECL_USING_SHADOW; } void ASTDeclWriter::VisitConstructorUsingShadowDecl( ConstructorUsingShadowDecl *D) { VisitUsingShadowDecl(D); Record.AddDeclRef(D->NominatedBaseClassShadowDecl); Record.AddDeclRef(D->ConstructedBaseClassShadowDecl); Record.push_back(D->IsVirtual); Code = serialization::DECL_CONSTRUCTOR_USING_SHADOW; } void ASTDeclWriter::VisitUsingDirectiveDecl(UsingDirectiveDecl *D) { VisitNamedDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddSourceLocation(D->getNamespaceKeyLocation()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclRef(D->getNominatedNamespace()); Record.AddDeclRef(dyn_cast(D->getCommonAncestor())); Code = serialization::DECL_USING_DIRECTIVE; } void ASTDeclWriter::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) { VisitValueDecl(D); Record.AddSourceLocation(D->getUsingLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddDeclarationNameLoc(D->DNLoc, D->getDeclName()); Record.AddSourceLocation(D->getEllipsisLoc()); Code = serialization::DECL_UNRESOLVED_USING_VALUE; } void ASTDeclWriter::VisitUnresolvedUsingTypenameDecl( UnresolvedUsingTypenameDecl *D) { VisitTypeDecl(D); Record.AddSourceLocation(D->getTypenameLoc()); Record.AddNestedNameSpecifierLoc(D->getQualifierLoc()); Record.AddSourceLocation(D->getEllipsisLoc()); Code = serialization::DECL_UNRESOLVED_USING_TYPENAME; } void ASTDeclWriter::VisitUnresolvedUsingIfExistsDecl( UnresolvedUsingIfExistsDecl *D) { VisitNamedDecl(D); Code = serialization::DECL_UNRESOLVED_USING_IF_EXISTS; } void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { VisitRecordDecl(D); enum { CXXRecNotTemplate = 0, CXXRecTemplate, CXXRecMemberSpecialization, CXXLambda }; if (ClassTemplateDecl *TemplD = D->getDescribedClassTemplate()) { Record.push_back(CXXRecTemplate); Record.AddDeclRef(TemplD); } else if (MemberSpecializationInfo *MSInfo = D->getMemberSpecializationInfo()) { Record.push_back(CXXRecMemberSpecialization); Record.AddDeclRef(MSInfo->getInstantiatedFrom()); Record.push_back(MSInfo->getTemplateSpecializationKind()); Record.AddSourceLocation(MSInfo->getPointOfInstantiation()); } else if (D->isLambda()) { // For a lambda, we need some information early for merging. Record.push_back(CXXLambda); if (auto *Context = D->getLambdaContextDecl()) { Record.AddDeclRef(Context); Record.push_back(D->getLambdaIndexInContext()); } else { Record.push_back(0); } } else { Record.push_back(CXXRecNotTemplate); } Record.push_back(D->isThisDeclarationADefinition()); if (D->isThisDeclarationADefinition()) Record.AddCXXDefinitionData(D); if (D->isCompleteDefinition() && D->isInNamedModule()) Writer.AddDeclRef(D, Writer.ModularCodegenDecls); // Store (what we currently believe to be) the key function to avoid // deserializing every method so we can compute it. // // FIXME: Avoid adding the key function if the class is defined in // module purview since in that case the key function is meaningless. if (D->isCompleteDefinition()) Record.AddDeclRef(Context.getCurrentKeyFunction(D)); Code = serialization::DECL_CXX_RECORD; } void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) { VisitFunctionDecl(D); if (D->isCanonicalDecl()) { Record.push_back(D->size_overridden_methods()); for (const CXXMethodDecl *MD : D->overridden_methods()) Record.AddDeclRef(MD); } else { // We only need to record overridden methods once for the canonical decl. Record.push_back(0); } if (D->getDeclContext() == D->getLexicalDeclContext() && D->getFirstDecl() == D->getMostRecentDecl() && !D->isInvalidDecl() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && D->getDeclName().getNameKind() == DeclarationName::Identifier && !D->hasExtInfo() && !D->isExplicitlyDefaulted()) { if (D->getTemplatedKind() == FunctionDecl::TK_NonTemplate || D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate || D->getTemplatedKind() == FunctionDecl::TK_MemberSpecialization || D->getTemplatedKind() == FunctionDecl::TK_DependentNonTemplate) AbbrevToUse = Writer.getDeclCXXMethodAbbrev(D->getTemplatedKind()); else if (D->getTemplatedKind() == FunctionDecl::TK_FunctionTemplateSpecialization) { FunctionTemplateSpecializationInfo *FTSInfo = D->getTemplateSpecializationInfo(); if (FTSInfo->TemplateArguments->size() == 1) { const TemplateArgument &TA = FTSInfo->TemplateArguments->get(0); if (TA.getKind() == TemplateArgument::Type && !FTSInfo->TemplateArgumentsAsWritten && !FTSInfo->getMemberSpecializationInfo()) AbbrevToUse = Writer.getDeclCXXMethodAbbrev(D->getTemplatedKind()); } } else if (D->getTemplatedKind() == FunctionDecl::TK_DependentFunctionTemplateSpecialization) { DependentFunctionTemplateSpecializationInfo *DFTSInfo = D->getDependentSpecializationInfo(); if (!DFTSInfo->TemplateArgumentsAsWritten) AbbrevToUse = Writer.getDeclCXXMethodAbbrev(D->getTemplatedKind()); } } Code = serialization::DECL_CXX_METHOD; } void ASTDeclWriter::VisitCXXConstructorDecl(CXXConstructorDecl *D) { static_assert(DeclContext::NumCXXConstructorDeclBits == 64, "You need to update the serializer after you change the " "CXXConstructorDeclBits"); Record.push_back(D->getTrailingAllocKind()); addExplicitSpecifier(D->getExplicitSpecifier(), Record); if (auto Inherited = D->getInheritedConstructor()) { Record.AddDeclRef(Inherited.getShadowDecl()); Record.AddDeclRef(Inherited.getConstructor()); } VisitCXXMethodDecl(D); Code = serialization::DECL_CXX_CONSTRUCTOR; } void ASTDeclWriter::VisitCXXDestructorDecl(CXXDestructorDecl *D) { VisitCXXMethodDecl(D); Record.AddDeclRef(D->getOperatorDelete()); if (D->getOperatorDelete()) Record.AddStmt(D->getOperatorDeleteThisArg()); Code = serialization::DECL_CXX_DESTRUCTOR; } void ASTDeclWriter::VisitCXXConversionDecl(CXXConversionDecl *D) { addExplicitSpecifier(D->getExplicitSpecifier(), Record); VisitCXXMethodDecl(D); Code = serialization::DECL_CXX_CONVERSION; } void ASTDeclWriter::VisitImportDecl(ImportDecl *D) { VisitDecl(D); Record.push_back(Writer.getSubmoduleID(D->getImportedModule())); ArrayRef IdentifierLocs = D->getIdentifierLocs(); Record.push_back(!IdentifierLocs.empty()); if (IdentifierLocs.empty()) { Record.AddSourceLocation(D->getEndLoc()); Record.push_back(1); } else { for (unsigned I = 0, N = IdentifierLocs.size(); I != N; ++I) Record.AddSourceLocation(IdentifierLocs[I]); Record.push_back(IdentifierLocs.size()); } // Note: the number of source locations must always be the last element in // the record. Code = serialization::DECL_IMPORT; } void ASTDeclWriter::VisitAccessSpecDecl(AccessSpecDecl *D) { VisitDecl(D); Record.AddSourceLocation(D->getColonLoc()); Code = serialization::DECL_ACCESS_SPEC; } void ASTDeclWriter::VisitFriendDecl(FriendDecl *D) { // Record the number of friend type template parameter lists here // so as to simplify memory allocation during deserialization. Record.push_back(D->NumTPLists); VisitDecl(D); bool hasFriendDecl = D->Friend.is(); Record.push_back(hasFriendDecl); if (hasFriendDecl) Record.AddDeclRef(D->getFriendDecl()); else Record.AddTypeSourceInfo(D->getFriendType()); for (unsigned i = 0; i < D->NumTPLists; ++i) Record.AddTemplateParameterList(D->getFriendTypeTemplateParameterList(i)); Record.AddDeclRef(D->getNextFriend()); Record.push_back(D->UnsupportedFriend); Record.AddSourceLocation(D->FriendLoc); Code = serialization::DECL_FRIEND; } void ASTDeclWriter::VisitFriendTemplateDecl(FriendTemplateDecl *D) { VisitDecl(D); Record.push_back(D->getNumTemplateParameters()); for (unsigned i = 0, e = D->getNumTemplateParameters(); i != e; ++i) Record.AddTemplateParameterList(D->getTemplateParameterList(i)); Record.push_back(D->getFriendDecl() != nullptr); if (D->getFriendDecl()) Record.AddDeclRef(D->getFriendDecl()); else Record.AddTypeSourceInfo(D->getFriendType()); Record.AddSourceLocation(D->getFriendLoc()); Code = serialization::DECL_FRIEND_TEMPLATE; } void ASTDeclWriter::VisitTemplateDecl(TemplateDecl *D) { VisitNamedDecl(D); Record.AddTemplateParameterList(D->getTemplateParameters()); Record.AddDeclRef(D->getTemplatedDecl()); } void ASTDeclWriter::VisitConceptDecl(ConceptDecl *D) { VisitTemplateDecl(D); Record.AddStmt(D->getConstraintExpr()); Code = serialization::DECL_CONCEPT; } void ASTDeclWriter::VisitImplicitConceptSpecializationDecl( ImplicitConceptSpecializationDecl *D) { Record.push_back(D->getTemplateArguments().size()); VisitDecl(D); for (const TemplateArgument &Arg : D->getTemplateArguments()) Record.AddTemplateArgument(Arg); Code = serialization::DECL_IMPLICIT_CONCEPT_SPECIALIZATION; } void ASTDeclWriter::VisitRequiresExprBodyDecl(RequiresExprBodyDecl *D) { Code = serialization::DECL_REQUIRES_EXPR_BODY; } void ASTDeclWriter::VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D) { VisitRedeclarable(D); // Emit data to initialize CommonOrPrev before VisitTemplateDecl so that // getCommonPtr() can be used while this is still initializing. if (D->isFirstDecl()) { // This declaration owns the 'common' pointer, so serialize that data now. Record.AddDeclRef(D->getInstantiatedFromMemberTemplate()); if (D->getInstantiatedFromMemberTemplate()) Record.push_back(D->isMemberSpecialization()); } VisitTemplateDecl(D); Record.push_back(D->getIdentifierNamespace()); } void ASTDeclWriter::VisitClassTemplateDecl(ClassTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); if (D->isFirstDecl()) AddTemplateSpecializations(D); // Force emitting the corresponding deduction guide in reduced BMI mode. // Otherwise, the deduction guide may be optimized out incorrectly. if (Writer.isGeneratingReducedBMI()) { auto Name = Context.DeclarationNames.getCXXDeductionGuideName(D); for (auto *DG : D->getDeclContext()->noload_lookup(Name)) Writer.GetDeclRef(DG->getCanonicalDecl()); } Code = serialization::DECL_CLASS_TEMPLATE; } void ASTDeclWriter::VisitClassTemplateSpecializationDecl( ClassTemplateSpecializationDecl *D) { RegisterTemplateSpecialization(D->getSpecializedTemplate(), D); VisitCXXRecordDecl(D); llvm::PointerUnion InstFrom = D->getSpecializedTemplateOrPartial(); if (Decl *InstFromD = InstFrom.dyn_cast()) { Record.AddDeclRef(InstFromD); } else { Record.AddDeclRef(InstFrom.get()); Record.AddTemplateArgumentList(&D->getTemplateInstantiationArgs()); } Record.AddTemplateArgumentList(&D->getTemplateArgs()); Record.AddSourceLocation(D->getPointOfInstantiation()); Record.push_back(D->getSpecializationKind()); Record.push_back(D->isCanonicalDecl()); if (D->isCanonicalDecl()) { // When reading, we'll add it to the folding set of the following template. Record.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl()); } bool ExplicitInstantiation = D->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDeclaration || D->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; Record.push_back(ExplicitInstantiation); if (ExplicitInstantiation) { Record.AddSourceLocation(D->getExternKeywordLoc()); Record.AddSourceLocation(D->getTemplateKeywordLoc()); } const ASTTemplateArgumentListInfo *ArgsWritten = D->getTemplateArgsAsWritten(); Record.push_back(!!ArgsWritten); if (ArgsWritten) Record.AddASTTemplateArgumentListInfo(ArgsWritten); Code = serialization::DECL_CLASS_TEMPLATE_SPECIALIZATION; } void ASTDeclWriter::VisitClassTemplatePartialSpecializationDecl( ClassTemplatePartialSpecializationDecl *D) { Record.AddTemplateParameterList(D->getTemplateParameters()); VisitClassTemplateSpecializationDecl(D); // These are read/set from/to the first declaration. if (D->getPreviousDecl() == nullptr) { Record.AddDeclRef(D->getInstantiatedFromMember()); Record.push_back(D->isMemberSpecialization()); } Code = serialization::DECL_CLASS_TEMPLATE_PARTIAL_SPECIALIZATION; } void ASTDeclWriter::VisitVarTemplateDecl(VarTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); if (D->isFirstDecl()) AddTemplateSpecializations(D); Code = serialization::DECL_VAR_TEMPLATE; } void ASTDeclWriter::VisitVarTemplateSpecializationDecl( VarTemplateSpecializationDecl *D) { RegisterTemplateSpecialization(D->getSpecializedTemplate(), D); llvm::PointerUnion InstFrom = D->getSpecializedTemplateOrPartial(); if (Decl *InstFromD = InstFrom.dyn_cast()) { Record.AddDeclRef(InstFromD); } else { Record.AddDeclRef(InstFrom.get()); Record.AddTemplateArgumentList(&D->getTemplateInstantiationArgs()); } bool ExplicitInstantiation = D->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDeclaration || D->getTemplateSpecializationKind() == TSK_ExplicitInstantiationDefinition; Record.push_back(ExplicitInstantiation); if (ExplicitInstantiation) { Record.AddSourceLocation(D->getExternKeywordLoc()); Record.AddSourceLocation(D->getTemplateKeywordLoc()); } const ASTTemplateArgumentListInfo *ArgsWritten = D->getTemplateArgsAsWritten(); Record.push_back(!!ArgsWritten); if (ArgsWritten) Record.AddASTTemplateArgumentListInfo(ArgsWritten); Record.AddTemplateArgumentList(&D->getTemplateArgs()); Record.AddSourceLocation(D->getPointOfInstantiation()); Record.push_back(D->getSpecializationKind()); Record.push_back(D->IsCompleteDefinition); VisitVarDecl(D); Record.push_back(D->isCanonicalDecl()); if (D->isCanonicalDecl()) { // When reading, we'll add it to the folding set of the following template. Record.AddDeclRef(D->getSpecializedTemplate()->getCanonicalDecl()); } Code = serialization::DECL_VAR_TEMPLATE_SPECIALIZATION; } void ASTDeclWriter::VisitVarTemplatePartialSpecializationDecl( VarTemplatePartialSpecializationDecl *D) { Record.AddTemplateParameterList(D->getTemplateParameters()); VisitVarTemplateSpecializationDecl(D); // These are read/set from/to the first declaration. if (D->getPreviousDecl() == nullptr) { Record.AddDeclRef(D->getInstantiatedFromMember()); Record.push_back(D->isMemberSpecialization()); } Code = serialization::DECL_VAR_TEMPLATE_PARTIAL_SPECIALIZATION; } void ASTDeclWriter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); if (D->isFirstDecl()) AddTemplateSpecializations(D); Code = serialization::DECL_FUNCTION_TEMPLATE; } void ASTDeclWriter::VisitTemplateTypeParmDecl(TemplateTypeParmDecl *D) { Record.push_back(D->hasTypeConstraint()); VisitTypeDecl(D); Record.push_back(D->wasDeclaredWithTypename()); const TypeConstraint *TC = D->getTypeConstraint(); - assert((bool)TC == D->hasTypeConstraint()); + Record.push_back(/*TypeConstraintInitialized=*/TC != nullptr); if (TC) { auto *CR = TC->getConceptReference(); Record.push_back(CR != nullptr); if (CR) Record.AddConceptReference(CR); Record.AddStmt(TC->getImmediatelyDeclaredConstraint()); Record.push_back(D->isExpandedParameterPack()); if (D->isExpandedParameterPack()) Record.push_back(D->getNumExpansionParameters()); } bool OwnsDefaultArg = D->hasDefaultArgument() && !D->defaultArgumentWasInherited(); Record.push_back(OwnsDefaultArg); if (OwnsDefaultArg) Record.AddTemplateArgumentLoc(D->getDefaultArgument()); - if (!TC && !OwnsDefaultArg && + if (!D->hasTypeConstraint() && !OwnsDefaultArg && D->getDeclContext() == D->getLexicalDeclContext() && !D->isInvalidDecl() && !D->hasAttrs() && !D->isTopLevelDeclInObjCContainer() && !D->isImplicit() && D->getDeclName().getNameKind() == DeclarationName::Identifier) AbbrevToUse = Writer.getDeclTemplateTypeParmAbbrev(); Code = serialization::DECL_TEMPLATE_TYPE_PARM; } void ASTDeclWriter::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) { // For an expanded parameter pack, record the number of expansion types here // so that it's easier for deserialization to allocate the right amount of // memory. Expr *TypeConstraint = D->getPlaceholderTypeConstraint(); Record.push_back(!!TypeConstraint); if (D->isExpandedParameterPack()) Record.push_back(D->getNumExpansionTypes()); VisitDeclaratorDecl(D); // TemplateParmPosition. Record.push_back(D->getDepth()); Record.push_back(D->getPosition()); if (TypeConstraint) Record.AddStmt(TypeConstraint); if (D->isExpandedParameterPack()) { for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) { Record.AddTypeRef(D->getExpansionType(I)); Record.AddTypeSourceInfo(D->getExpansionTypeSourceInfo(I)); } Code = serialization::DECL_EXPANDED_NON_TYPE_TEMPLATE_PARM_PACK; } else { // Rest of NonTypeTemplateParmDecl. Record.push_back(D->isParameterPack()); bool OwnsDefaultArg = D->hasDefaultArgument() && !D->defaultArgumentWasInherited(); Record.push_back(OwnsDefaultArg); if (OwnsDefaultArg) Record.AddTemplateArgumentLoc(D->getDefaultArgument()); Code = serialization::DECL_NON_TYPE_TEMPLATE_PARM; } } void ASTDeclWriter::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) { // For an expanded parameter pack, record the number of expansion types here // so that it's easier for deserialization to allocate the right amount of // memory. if (D->isExpandedParameterPack()) Record.push_back(D->getNumExpansionTemplateParameters()); VisitTemplateDecl(D); Record.push_back(D->wasDeclaredWithTypename()); // TemplateParmPosition. Record.push_back(D->getDepth()); Record.push_back(D->getPosition()); if (D->isExpandedParameterPack()) { for (unsigned I = 0, N = D->getNumExpansionTemplateParameters(); I != N; ++I) Record.AddTemplateParameterList(D->getExpansionTemplateParameters(I)); Code = serialization::DECL_EXPANDED_TEMPLATE_TEMPLATE_PARM_PACK; } else { // Rest of TemplateTemplateParmDecl. Record.push_back(D->isParameterPack()); bool OwnsDefaultArg = D->hasDefaultArgument() && !D->defaultArgumentWasInherited(); Record.push_back(OwnsDefaultArg); if (OwnsDefaultArg) Record.AddTemplateArgumentLoc(D->getDefaultArgument()); Code = serialization::DECL_TEMPLATE_TEMPLATE_PARM; } } void ASTDeclWriter::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) { VisitRedeclarableTemplateDecl(D); Code = serialization::DECL_TYPE_ALIAS_TEMPLATE; } void ASTDeclWriter::VisitStaticAssertDecl(StaticAssertDecl *D) { VisitDecl(D); Record.AddStmt(D->getAssertExpr()); Record.push_back(D->isFailed()); Record.AddStmt(D->getMessage()); Record.AddSourceLocation(D->getRParenLoc()); Code = serialization::DECL_STATIC_ASSERT; } /// Emit the DeclContext part of a declaration context decl. void ASTDeclWriter::VisitDeclContext(DeclContext *DC) { static_assert(DeclContext::NumDeclContextBits == 13, "You need to update the serializer after you change the " "DeclContextBits"); uint64_t LexicalOffset = 0; uint64_t VisibleOffset = 0; if (Writer.isGeneratingReducedBMI() && isa(DC) && cast(DC)->isFromExplicitGlobalModule()) { // In reduced BMI, delay writing lexical and visible block for namespace // in the global module fragment. See the comments of DelayedNamespace for // details. Writer.DelayedNamespace.push_back(cast(DC)); } else { LexicalOffset = Writer.WriteDeclContextLexicalBlock(Context, DC); VisibleOffset = Writer.WriteDeclContextVisibleBlock(Context, DC); } Record.AddOffset(LexicalOffset); Record.AddOffset(VisibleOffset); } const Decl *ASTWriter::getFirstLocalDecl(const Decl *D) { assert(IsLocalDecl(D) && "expected a local declaration"); const Decl *Canon = D->getCanonicalDecl(); if (IsLocalDecl(Canon)) return Canon; const Decl *&CacheEntry = FirstLocalDeclCache[Canon]; if (CacheEntry) return CacheEntry; for (const Decl *Redecl = D; Redecl; Redecl = Redecl->getPreviousDecl()) if (IsLocalDecl(Redecl)) D = Redecl; return CacheEntry = D; } template void ASTDeclWriter::VisitRedeclarable(Redeclarable *D) { T *First = D->getFirstDecl(); T *MostRecent = First->getMostRecentDecl(); T *DAsT = static_cast(D); if (MostRecent != First) { assert(isRedeclarableDeclKind(DAsT->getKind()) && "Not considered redeclarable?"); Record.AddDeclRef(First); // Write out a list of local redeclarations of this declaration if it's the // first local declaration in the chain. const Decl *FirstLocal = Writer.getFirstLocalDecl(DAsT); if (DAsT == FirstLocal) { // Emit a list of all imported first declarations so that we can be sure // that all redeclarations visible to this module are before D in the // redecl chain. unsigned I = Record.size(); Record.push_back(0); if (Writer.Chain) AddFirstDeclFromEachModule(DAsT, /*IncludeLocal*/false); // This is the number of imported first declarations + 1. Record[I] = Record.size() - I; // Collect the set of local redeclarations of this declaration, from // newest to oldest. ASTWriter::RecordData LocalRedecls; ASTRecordWriter LocalRedeclWriter(Record, LocalRedecls); for (const Decl *Prev = FirstLocal->getMostRecentDecl(); Prev != FirstLocal; Prev = Prev->getPreviousDecl()) if (!Prev->isFromASTFile()) LocalRedeclWriter.AddDeclRef(Prev); // If we have any redecls, write them now as a separate record preceding // the declaration itself. if (LocalRedecls.empty()) Record.push_back(0); else Record.AddOffset(LocalRedeclWriter.Emit(LOCAL_REDECLARATIONS)); } else { Record.push_back(0); Record.AddDeclRef(FirstLocal); } // Make sure that we serialize both the previous and the most-recent // declarations, which (transitively) ensures that all declarations in the // chain get serialized. // // FIXME: This is not correct; when we reach an imported declaration we // won't emit its previous declaration. (void)Writer.GetDeclRef(D->getPreviousDecl()); (void)Writer.GetDeclRef(MostRecent); } else { // We use the sentinel value 0 to indicate an only declaration. Record.push_back(0); } } void ASTDeclWriter::VisitHLSLBufferDecl(HLSLBufferDecl *D) { VisitNamedDecl(D); VisitDeclContext(D); Record.push_back(D->isCBuffer()); Record.AddSourceLocation(D->getLocStart()); Record.AddSourceLocation(D->getLBraceLoc()); Record.AddSourceLocation(D->getRBraceLoc()); Code = serialization::DECL_HLSL_BUFFER; } void ASTDeclWriter::VisitOMPThreadPrivateDecl(OMPThreadPrivateDecl *D) { Record.writeOMPChildren(D->Data); VisitDecl(D); Code = serialization::DECL_OMP_THREADPRIVATE; } void ASTDeclWriter::VisitOMPAllocateDecl(OMPAllocateDecl *D) { Record.writeOMPChildren(D->Data); VisitDecl(D); Code = serialization::DECL_OMP_ALLOCATE; } void ASTDeclWriter::VisitOMPRequiresDecl(OMPRequiresDecl *D) { Record.writeOMPChildren(D->Data); VisitDecl(D); Code = serialization::DECL_OMP_REQUIRES; } void ASTDeclWriter::VisitOMPDeclareReductionDecl(OMPDeclareReductionDecl *D) { static_assert(DeclContext::NumOMPDeclareReductionDeclBits == 15, "You need to update the serializer after you change the " "NumOMPDeclareReductionDeclBits"); VisitValueDecl(D); Record.AddSourceLocation(D->getBeginLoc()); Record.AddStmt(D->getCombinerIn()); Record.AddStmt(D->getCombinerOut()); Record.AddStmt(D->getCombiner()); Record.AddStmt(D->getInitOrig()); Record.AddStmt(D->getInitPriv()); Record.AddStmt(D->getInitializer()); Record.push_back(llvm::to_underlying(D->getInitializerKind())); Record.AddDeclRef(D->getPrevDeclInScope()); Code = serialization::DECL_OMP_DECLARE_REDUCTION; } void ASTDeclWriter::VisitOMPDeclareMapperDecl(OMPDeclareMapperDecl *D) { Record.writeOMPChildren(D->Data); VisitValueDecl(D); Record.AddDeclarationName(D->getVarName()); Record.AddDeclRef(D->getPrevDeclInScope()); Code = serialization::DECL_OMP_DECLARE_MAPPER; } void ASTDeclWriter::VisitOMPCapturedExprDecl(OMPCapturedExprDecl *D) { VisitVarDecl(D); Code = serialization::DECL_OMP_CAPTUREDEXPR; } //===----------------------------------------------------------------------===// // ASTWriter Implementation //===----------------------------------------------------------------------===// namespace { template std::shared_ptr getFunctionDeclAbbrev(serialization::DeclCode Code) { using namespace llvm; auto Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(Code)); // RedeclarableDecl Abv->Add(BitCodeAbbrevOp(0)); // CanonicalDecl Abv->Add(BitCodeAbbrevOp(Kind)); if constexpr (Kind == FunctionDecl::TK_NonTemplate) { } else if constexpr (Kind == FunctionDecl::TK_FunctionTemplate) { // DescribedFunctionTemplate Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); } else if constexpr (Kind == FunctionDecl::TK_DependentNonTemplate) { // Instantiated From Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); } else if constexpr (Kind == FunctionDecl::TK_MemberSpecialization) { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedFrom Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // TemplateSpecializationKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Specialized Location } else if constexpr (Kind == FunctionDecl::TK_FunctionTemplateSpecialization) { Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Template Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // TemplateSpecializationKind Abv->Add(BitCodeAbbrevOp(1)); // Template Argument Size Abv->Add(BitCodeAbbrevOp(TemplateArgument::Type)); // Template Argument Kind Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Template Argument Type Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Is Defaulted Abv->Add(BitCodeAbbrevOp(0)); // TemplateArgumentsAsWritten Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation Abv->Add(BitCodeAbbrevOp(0)); Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Canonical Decl of template } else if constexpr (Kind == FunctionDecl:: TK_DependentFunctionTemplateSpecialization) { // Candidates of specialization Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(0)); // TemplateArgumentsAsWritten } else { llvm_unreachable("Unknown templated kind?"); } // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // isImplicit // // The following bits should be 0: // HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(DeclarationName::Identifier)); // NameKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Identifier Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerLocStart Abv->Add(BitCodeAbbrevOp(0)); // HasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // FunctionDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 28)); // Packed Function Bits: StorageClass, Inline, InlineSpecified, // VirtualAsWritten, Pure, HasInheritedProto, HasWrittenProto, // Deleted, Trivial, TrivialForCall, Defaulted, ExplicitlyDefaulted, // IsIneligibleOrNotSelected, ImplicitReturnZero, Constexpr, // UsesSEHTry, SkippedBody, MultiVersion, LateParsed, // FriendConstraintRefersToEnclosingTemplate, Linkage, // ShouldSkipCheckingODR Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LocEnd Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // ODRHash // This Array slurps the rest of the record. Fortunately we want to encode // (nearly) all the remaining (variable number of) fields in the same way. // // This is: // NumParams and Params[] from FunctionDecl, and // NumOverriddenMethods, OverriddenMethods[] from CXXMethodDecl. // // Add an AbbrevOp for 'size then elements' and use it here. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); return Abv; } template std::shared_ptr getCXXMethodAbbrev() { return getFunctionDeclAbbrev(serialization::DECL_CXX_METHOD); } } // namespace void ASTWriter::WriteDeclAbbrevs() { using namespace llvm; std::shared_ptr Abv; // Abbreviation for DECL_FIELD Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_FIELD)); // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // // The following bits should be 0: // isImplicit, HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // FieldDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isMutable Abv->Add(BitCodeAbbrevOp(0)); // StorageKind // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclFieldAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_OBJC_IVAR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_OBJC_IVAR)); // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed DeclBits: HasStandaloneLexicalDC, // isInvalidDecl, HasAttrs, isImplicit, isUsed, // isReferenced, TopLevelDeclInObjCContainer, // AccessSpecifier, ModuleOwnershipKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // FieldDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isMutable Abv->Add(BitCodeAbbrevOp(0)); // InitStyle // ObjC Ivar Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getAccessControl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getSynthesize // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclObjCIvarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_ENUM Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_ENUM)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // // The following bits should be 0: // isImplicit, HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TagDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IdentifierNamespace Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 9)); // Packed Tag Decl Bits: getTagKind, isCompleteDefinition, // EmbeddedInDeclarator, IsFreeStanding, // isCompleteDefinitionRequired, ExtInfoKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation // EnumDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddTypeRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IntegerType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getPromotionType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 20)); // Enum Decl Bits Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));// ODRHash Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedMembEnum // DC Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalOffset Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // VisibleOffset DeclEnumAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_RECORD Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_RECORD)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isUsed, isReferenced, AccessSpecifier, // // The following bits should be 0: // isImplicit, HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, // isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TagDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // IdentifierNamespace Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 9)); // Packed Tag Decl Bits: getTagKind, isCompleteDefinition, // EmbeddedInDeclarator, IsFreeStanding, // isCompleteDefinitionRequired, ExtInfoKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SourceLocation // RecordDecl Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 13)); // Packed Record Decl Bits: FlexibleArrayMember, // AnonymousStructUnion, hasObjectMember, hasVolatileMember, // isNonTrivialToPrimitiveDefaultInitialize, // isNonTrivialToPrimitiveCopy, isNonTrivialToPrimitiveDestroy, // hasNonTrivialToPrimitiveDefaultInitializeCUnion, // hasNonTrivialToPrimitiveDestructCUnion, // hasNonTrivialToPrimitiveCopyCUnion, isParamDestroyedInCallee, // getArgPassingRestrictions // ODRHash Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 26)); // DC Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalOffset Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // VisibleOffset DeclRecordAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_PARM_VAR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_PARM_VAR)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // Packed DeclBits: ModuleOwnershipKind, isUsed, // isReferenced, AccessSpecifier, // HasStandaloneLexicalDC, HasAttrs, isImplicit, // TopLevelDeclInObjCContainer, // isInvalidDecl, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // VarDecl Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed Var Decl bits: SClass, TSCSpec, InitStyle, // isARCPseudoStrong, Linkage, ModulesCodegen Abv->Add(BitCodeAbbrevOp(0)); // VarKind (local enum) // ParmVarDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ScopeIndex Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 19)); // Packed Parm Var Decl bits: IsObjCMethodParameter, ScopeDepth, // ObjCDeclQualifier, KNRPromoted, // HasInheritedDefaultArg, HasUninstantiatedDefaultArg // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclParmVarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_TYPEDEF Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_TYPEDEF)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isReferenced, isUsed, AccessSpecifier. Other // higher bits should be 0: isImplicit, // HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TypedefDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclTypedefAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_VAR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_VAR)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed DeclBits: HasStandaloneLexicalDC, // isInvalidDecl, HasAttrs, isImplicit, isUsed, // isReferenced, TopLevelDeclInObjCContainer, // AccessSpecifier, ModuleOwnershipKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // AnonDeclNumber // ValueDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclaratorDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InnerStartLoc Abv->Add(BitCodeAbbrevOp(0)); // hasExtInfo Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TSIType // VarDecl Abv->Add(BitCodeAbbrevOp( BitCodeAbbrevOp::Fixed, 21)); // Packed Var Decl bits: Linkage, ModulesCodegen, // SClass, TSCSpec, InitStyle, // isARCPseudoStrong, IsThisDeclarationADemotedDefinition, // isExceptionVariable, isNRVOVariable, isCXXForRangeDecl, // isInline, isInlineSpecified, isConstexpr, // isInitCapture, isPrevDeclInSameScope, // EscapingByref, HasDeducedType, ImplicitParamKind, isObjCForDecl Abv->Add(BitCodeAbbrevOp(0)); // VarKind (local enum) // Type Source Info Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TypeLoc DeclVarAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_CXX_METHOD DeclCXXMethodAbbrev = Stream.EmitAbbrev(getCXXMethodAbbrev()); DeclTemplateCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclDependentNonTemplateCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclMemberSpecializedCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclTemplateSpecializedCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev()); DeclDependentSpecializationCXXMethodAbbrev = Stream.EmitAbbrev( getCXXMethodAbbrev< FunctionDecl::TK_DependentFunctionTemplateSpecialization>()); // Abbreviation for DECL_TEMPLATE_TYPE_PARM Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_TEMPLATE_TYPE_PARM)); Abv->Add(BitCodeAbbrevOp(0)); // hasTypeConstraint // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // Packed DeclBits: ModuleOwnershipKind, // isReferenced, isUsed, AccessSpecifier. Other // higher bits should be 0: isImplicit, // HasStandaloneLexicalDC, HasAttrs, // TopLevelDeclInObjCContainer, isInvalidDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // TypeDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type Ref // TemplateTypeParmDecl Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // wasDeclaredWithTypename + Abv->Add(BitCodeAbbrevOp(0)); // TypeConstraintInitialized Abv->Add(BitCodeAbbrevOp(0)); // OwnsDefaultArg DeclTemplateTypeParmAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for DECL_USING_SHADOW Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_USING_SHADOW)); // Redeclarable Abv->Add(BitCodeAbbrevOp(0)); // No redeclaration // Decl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 12)); // Packed DeclBits: HasStandaloneLexicalDC, // isInvalidDecl, HasAttrs, isImplicit, isUsed, // isReferenced, TopLevelDeclInObjCContainer, // AccessSpecifier, ModuleOwnershipKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // SubmoduleID // NamedDecl Abv->Add(BitCodeAbbrevOp(0)); // NameKind = Identifier Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Name Abv->Add(BitCodeAbbrevOp(0)); // UsingShadowDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // TargetDecl Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 11)); // IDNS Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // UsingOrNextShadow Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // InstantiatedFromUsingShadowDecl DeclUsingShadowAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_DECL_REF Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_DECL_REF)); // Stmt // Expr // PackingBits: DependenceKind, ValueKind. ObjectKind should be 0. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // DeclRefExpr // Packing Bits: , HadMultipleCandidates, RefersToEnclosingVariableOrCapture, // IsImmediateEscalating, NonOdrUseReason. // GetDeclFound, HasQualifier and ExplicitTemplateArgs should be 0. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclRef Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location DeclRefExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_INTEGER_LITERAL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_INTEGER_LITERAL)); //Stmt // Expr // DependenceKind, ValueKind, ObjectKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // Integer Literal Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location Abv->Add(BitCodeAbbrevOp(32)); // Bit Width Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Value IntegerLiteralAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CHARACTER_LITERAL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CHARACTER_LITERAL)); //Stmt // Expr // DependenceKind, ValueKind, ObjectKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // Character Literal Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // getValue Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // getKind CharacterLiteralAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_IMPLICIT_CAST Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_IMPLICIT_CAST)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CastExpr Abv->Add(BitCodeAbbrevOp(0)); // PathSize // Packing Bits: CastKind, StoredFPFeatures, isPartOfExplicitCast Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 9)); // ImplicitCastExpr ExprImplicitCastAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_BINARY_OPERATOR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_BINARY_OPERATOR)); // Stmt // Expr // Packing Bits: DependenceKind. ValueKind and ObjectKind should // be 0 in this case. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // BinaryOperator Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpCode and HasFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location BinaryOperatorAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_COMPOUND_ASSIGN_OPERATOR Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_COMPOUND_ASSIGN_OPERATOR)); // Stmt // Expr // Packing Bits: DependenceKind. ValueKind and ObjectKind should // be 0 in this case. Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // BinaryOperator // Packing Bits: OpCode. The HasFPFeatures bit should be 0 Abv->Add( BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpCode and HasFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location // CompoundAssignOperator Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHSType Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Result Type CompoundAssignOperatorAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CALL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CALL)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumArgs Abv->Add(BitCodeAbbrevOp(0)); // ADLCallKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location CallExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CXX_OPERATOR_CALL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CXX_OPERATOR_CALL)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumArgs Abv->Add(BitCodeAbbrevOp(0)); // ADLCallKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location // CXXOperatorCallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Operator Kind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location CXXOperatorCallExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for EXPR_CXX_MEMBER_CALL Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::EXPR_CXX_MEMBER_CALL)); // Stmt // Expr // Packing Bits: DependenceKind, ValueKind, ObjectKind, Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Type // CallExpr Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // NumArgs Abv->Add(BitCodeAbbrevOp(0)); // ADLCallKind Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location // CXXMemberCallExpr CXXMemberCallExprAbbrev = Stream.EmitAbbrev(std::move(Abv)); // Abbreviation for STMT_COMPOUND Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::STMT_COMPOUND)); // Stmt // CompoundStmt Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Num Stmts Abv->Add(BitCodeAbbrevOp(0)); // hasStoredFPFeatures Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Source Location CompoundStmtAbbrev = Stream.EmitAbbrev(std::move(Abv)); Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_CONTEXT_LEXICAL)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); DeclContextLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); Abv = std::make_shared(); Abv->Add(BitCodeAbbrevOp(serialization::DECL_CONTEXT_VISIBLE)); Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); DeclContextVisibleLookupAbbrev = Stream.EmitAbbrev(std::move(Abv)); } /// isRequiredDecl - Check if this is a "required" Decl, which must be seen by /// consumers of the AST. /// /// Such decls will always be deserialized from the AST file, so we would like /// this to be as restrictive as possible. Currently the predicate is driven by /// code generation requirements, if other clients have a different notion of /// what is "required" then we may have to consider an alternate scheme where /// clients can iterate over the top-level decls and get information on them, /// without necessary deserializing them. We could explicitly require such /// clients to use a separate API call to "realize" the decl. This should be /// relatively painless since they would presumably only do it for top-level /// decls. static bool isRequiredDecl(const Decl *D, ASTContext &Context, Module *WritingModule) { // Named modules have different semantics than header modules. Every named // module units owns a translation unit. So the importer of named modules // doesn't need to deserilize everything ahead of time. if (WritingModule && WritingModule->isNamedModule()) { // The PragmaCommentDecl and PragmaDetectMismatchDecl are MSVC's extension. // And the behavior of MSVC for such cases will leak this to the module // users. Given pragma is not a standard thing, the compiler has the space // to do their own decision. Let's follow MSVC here. if (isa(D)) return true; return false; } // An ObjCMethodDecl is never considered as "required" because its // implementation container always is. // File scoped assembly or obj-c or OMP declare target implementation must be // seen. if (isa(D)) return true; if (WritingModule && isPartOfPerModuleInitializer(D)) { // These declarations are part of the module initializer, and are emitted // if and when the module is imported, rather than being emitted eagerly. return false; } return Context.DeclMustBeEmitted(D); } void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) { PrettyDeclStackTraceEntry CrashInfo(Context, D, SourceLocation(), "serializing"); // Determine the ID for this declaration. LocalDeclID ID; assert(!D->isFromASTFile() && "should not be emitting imported decl"); LocalDeclID &IDR = DeclIDs[D]; if (IDR.isInvalid()) IDR = NextDeclID++; ID = IDR; assert(ID >= FirstDeclID && "invalid decl ID"); RecordData Record; ASTDeclWriter W(*this, Context, Record, GeneratingReducedBMI); // Build a record for this declaration W.Visit(D); // Emit this declaration to the bitstream. uint64_t Offset = W.Emit(D); // Record the offset for this declaration SourceLocation Loc = D->getLocation(); SourceLocationEncoding::RawLocEncoding RawLoc = getRawSourceLocationEncoding(getAdjustedLocation(Loc)); unsigned Index = ID.getRawValue() - FirstDeclID.getRawValue(); if (DeclOffsets.size() == Index) DeclOffsets.emplace_back(RawLoc, Offset, DeclTypesBlockStartOffset); else if (DeclOffsets.size() < Index) { // FIXME: Can/should this happen? DeclOffsets.resize(Index+1); DeclOffsets[Index].setRawLoc(RawLoc); DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset); } else { llvm_unreachable("declarations should be emitted in ID order"); } SourceManager &SM = Context.getSourceManager(); if (Loc.isValid() && SM.isLocalSourceLocation(Loc)) associateDeclWithFile(D, ID); // Note declarations that should be deserialized eagerly so that we can add // them to a record in the AST file later. if (isRequiredDecl(D, Context, WritingModule)) AddDeclRef(D, EagerlyDeserializedDecls); } void ASTRecordWriter::AddFunctionDefinition(const FunctionDecl *FD) { // Switch case IDs are per function body. Writer->ClearSwitchCaseIDs(); assert(FD->doesThisDeclarationHaveABody()); bool ModulesCodegen = false; if (!FD->isDependentContext()) { std::optional Linkage; if (Writer->WritingModule && Writer->WritingModule->isInterfaceOrPartition()) { // When building a C++20 module interface unit or a partition unit, a // strong definition in the module interface is provided by the // compilation of that unit, not by its users. (Inline functions are still // emitted in module users.) Linkage = Writer->Context->GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage >= GVA_StrongExternal; } if (Writer->Context->getLangOpts().ModulesCodegen || (FD->hasAttr() && Writer->Context->getLangOpts().BuildingPCHWithObjectFile)) { // Under -fmodules-codegen, codegen is performed for all non-internal, // non-always_inline functions, unless they are available elsewhere. if (!FD->hasAttr()) { if (!Linkage) Linkage = Writer->Context->GetGVALinkageForFunction(FD); ModulesCodegen = *Linkage != GVA_Internal && *Linkage != GVA_AvailableExternally; } } } Record->push_back(ModulesCodegen); if (ModulesCodegen) Writer->AddDeclRef(FD, Writer->ModularCodegenDecls); if (auto *CD = dyn_cast(FD)) { Record->push_back(CD->getNumCtorInitializers()); if (CD->getNumCtorInitializers()) AddCXXCtorInitializers(llvm::ArrayRef(CD->init_begin(), CD->init_end())); } AddStmt(FD->getBody()); } diff --git a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp index eeb49e2afe34..80b8158f43db 100644 --- a/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp +++ b/contrib/llvm-project/compiler-rt/lib/sanitizer_common/sanitizer_procmaps_solaris.cpp @@ -1,92 +1,96 @@ //===-- sanitizer_procmaps_solaris.cpp ------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Information about the process mappings (Solaris-specific parts). //===----------------------------------------------------------------------===// // Before Solaris 11.4, doesn't work in a largefile environment. #undef _FILE_OFFSET_BITS + +// Avoid conflict between `_TIME_BITS` defined vs. `_FILE_OFFSET_BITS` +// undefined in some Linux configurations. +#undef _TIME_BITS #include "sanitizer_platform.h" #if SANITIZER_SOLARIS # include # include # include # include "sanitizer_common.h" # include "sanitizer_procmaps.h" namespace __sanitizer { void ReadProcMaps(ProcSelfMapsBuff *proc_maps) { uptr fd = internal_open("/proc/self/xmap", O_RDONLY); CHECK_NE(fd, -1); uptr Size = internal_filesize(fd); CHECK_GT(Size, 0); // Allow for additional entries by following mmap. size_t MmapedSize = Size * 4 / 3; void *VmMap = MmapOrDie(MmapedSize, "ReadProcMaps()"); Size = internal_read(fd, VmMap, MmapedSize); CHECK_NE(Size, -1); internal_close(fd); proc_maps->data = (char *)VmMap; proc_maps->mmaped_size = MmapedSize; proc_maps->len = Size; } bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) { if (Error()) return false; // simulate empty maps char *last = data_.proc_self_maps.data + data_.proc_self_maps.len; if (data_.current >= last) return false; prxmap_t *xmapentry = const_cast(reinterpret_cast(data_.current)); segment->start = (uptr)xmapentry->pr_vaddr; segment->end = (uptr)(xmapentry->pr_vaddr + xmapentry->pr_size); segment->offset = (uptr)xmapentry->pr_offset; segment->protection = 0; if ((xmapentry->pr_mflags & MA_READ) != 0) segment->protection |= kProtectionRead; if ((xmapentry->pr_mflags & MA_WRITE) != 0) segment->protection |= kProtectionWrite; if ((xmapentry->pr_mflags & MA_EXEC) != 0) segment->protection |= kProtectionExecute; if ((xmapentry->pr_mflags & MA_SHARED) != 0) segment->protection |= kProtectionShared; if (segment->filename != NULL && segment->filename_size > 0) { char proc_path[PATH_MAX + 1]; // Avoid unnecessary readlink on unnamed entires. if (xmapentry->pr_mapname[0] == '\0') segment->filename[0] = '\0'; else { internal_snprintf(proc_path, sizeof(proc_path), "/proc/self/path/%s", xmapentry->pr_mapname); ssize_t sz = internal_readlink(proc_path, segment->filename, segment->filename_size - 1); // If readlink failed, the map is anonymous. if (sz == -1) segment->filename[0] = '\0'; else if ((size_t)sz < segment->filename_size) // readlink doesn't NUL-terminate. segment->filename[sz] = '\0'; } } data_.current += sizeof(prxmap_t); return true; } } // namespace __sanitizer #endif // SANITIZER_SOLARIS diff --git a/contrib/llvm-project/libcxx/include/__config b/contrib/llvm-project/libcxx/include/__config index 87b6d9d19e83..7d735e5b6601 100644 --- a/contrib/llvm-project/libcxx/include/__config +++ b/contrib/llvm-project/libcxx/include/__config @@ -1,1253 +1,1253 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP___CONFIG #define _LIBCPP___CONFIG #include <__config_site> #include <__configuration/abi.h> #include <__configuration/availability.h> #include <__configuration/compiler.h> #include <__configuration/platform.h> #ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER # pragma GCC system_header #endif #ifdef __cplusplus // The attributes supported by clang are documented at https://clang.llvm.org/docs/AttributeReference.html // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 190102 +# define _LIBCPP_VERSION 190103 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) # if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING # endif // HARDENING { // This is for backward compatibility -- make enabling `_LIBCPP_ENABLE_ASSERTIONS` (which predates hardening modes) // equivalent to setting the extensive mode. This is deprecated and will be removed in LLVM 20. # ifdef _LIBCPP_ENABLE_ASSERTIONS # warning "_LIBCPP_ENABLE_ASSERTIONS is deprecated, please use _LIBCPP_HARDENING_MODE instead" # if _LIBCPP_ENABLE_ASSERTIONS != 0 && _LIBCPP_ENABLE_ASSERTIONS != 1 # error "_LIBCPP_ENABLE_ASSERTIONS must be set to 0 or 1" # endif # if _LIBCPP_ENABLE_ASSERTIONS # define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_EXTENSIVE # endif # endif // The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values: // // - `_LIBCPP_HARDENING_MODE_NONE`; // - `_LIBCPP_HARDENING_MODE_FAST`; // - `_LIBCPP_HARDENING_MODE_EXTENSIVE`; // - `_LIBCPP_HARDENING_MODE_DEBUG`. // // These values have the following effects: // // - `_LIBCPP_HARDENING_MODE_NONE` -- sets the hardening mode to "none" which disables all runtime hardening checks; // // - `_LIBCPP_HARDENING_MODE_FAST` -- sets that hardening mode to "fast". The fast mode enables security-critical checks // that can be done with relatively little runtime overhead in constant time; // // - `_LIBCPP_HARDENING_MODE_EXTENSIVE` -- sets the hardening mode to "extensive". The extensive mode is a superset of // the fast mode that additionally enables checks that are relatively cheap and prevent common types of logic errors // but are not necessarily security-critical; // // - `_LIBCPP_HARDENING_MODE_DEBUG` -- sets the hardening mode to "debug". The debug mode is a superset of the extensive // mode and enables all checks available in the library, including internal assertions. Checks that are part of the // debug mode can be very expensive and thus the debug mode is intended to be used for testing, not in production. // Inside the library, assertions are categorized so they can be cherry-picked based on the chosen hardening mode. These // macros are only for internal use -- users should only pick one of the high-level hardening modes described above. // // - `_LIBCPP_ASSERT_VALID_INPUT_RANGE` -- checks that ranges (whether expressed as an iterator pair, an iterator and // a sentinel, an iterator and a count, or a `std::range`) given as input to library functions are valid: // - the sentinel is reachable from the begin iterator; // - TODO(hardening): both iterators refer to the same container. // // - `_LIBCPP_ASSERT_VALID_ELEMENT_ACCESS` -- checks that any attempts to access a container element, whether through // the container object or through an iterator, are valid and do not attempt to go out of bounds or otherwise access // a non-existent element. For iterator checks to work, bounded iterators must be enabled in the ABI. Types like // `optional` and `function` are considered one-element containers for the purposes of this check. // // - `_LIBCPP_ASSERT_NON_NULL` -- checks that the pointer being dereferenced is not null. On most modern platforms zero // address does not refer to an actual location in memory, so a null pointer dereference would not compromize the // memory security of a program (however, it is still undefined behavior that can result in strange errors due to // compiler optimizations). // // - `_LIBCPP_ASSERT_NON_OVERLAPPING_RANGES` -- for functions that take several ranges as arguments, checks that the // given ranges do not overlap. // // - `_LIBCPP_ASSERT_VALID_DEALLOCATION` -- checks that an attempt to deallocate memory is valid (e.g. the given object // was allocated by the given allocator). Violating this category typically results in a memory leak. // // - `_LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL` -- checks that a call to an external API doesn't fail in // an unexpected manner. This includes triggering documented cases of undefined behavior in an external library (like // attempting to unlock an unlocked mutex in pthreads). Any API external to the library falls under this category // (from system calls to compiler intrinsics). We generally don't expect these failures to compromize memory safety or // otherwise create an immediate security issue. // // - `_LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR` -- checks any operations that exchange nodes between containers to make sure // the containers have compatible allocators. // // - `_LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN` -- checks that the given argument is within the domain of valid arguments // for the function. Violating this typically produces an incorrect result (e.g. the clamp algorithm returns the // original value without clamping it due to incorrect functors) or puts an object into an invalid state (e.g. // a string view where only a subset of elements is possible to access). This category is for assertions violating // which doesn't cause any immediate issues in the library -- whatever the consequences are, they will happen in the // user code. // // - `_LIBCPP_ASSERT_PEDANTIC` -- checks prerequisites which are imposed by the Standard, but violating which happens to // be benign in our implementation. // // - `_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT` -- checks that the given argument satisfies the semantic requirements imposed // by the Standard. Typically, there is no simple way to completely prove that a semantic requirement is satisfied; // thus, this would often be a heuristic check and it might be quite expensive. // // - `_LIBCPP_ASSERT_INTERNAL` -- checks that internal invariants of the library hold. These assertions don't depend on // user input. // // - `_LIBCPP_ASSERT_UNCATEGORIZED` -- for assertions that haven't been properly classified yet. // clang-format off # define _LIBCPP_HARDENING_MODE_NONE (1 << 1) # define _LIBCPP_HARDENING_MODE_FAST (1 << 2) # define _LIBCPP_HARDENING_MODE_EXTENSIVE (1 << 4) // Deliberately not ordered. # define _LIBCPP_HARDENING_MODE_DEBUG (1 << 3) // clang-format on # ifndef _LIBCPP_HARDENING_MODE # ifndef _LIBCPP_HARDENING_MODE_DEFAULT # error _LIBCPP_HARDENING_MODE_DEFAULT is not defined. This definition should be set at configuration time in the \ `__config_site` header, please make sure your installation of libc++ is not broken. # endif # define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_DEFAULT # endif # if _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_NONE && \ _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_FAST && \ _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_EXTENSIVE && \ _LIBCPP_HARDENING_MODE != _LIBCPP_HARDENING_MODE_DEBUG # error _LIBCPP_HARDENING_MODE must be set to one of the following values: \ _LIBCPP_HARDENING_MODE_NONE, \ _LIBCPP_HARDENING_MODE_FAST, \ _LIBCPP_HARDENING_MODE_EXTENSIVE, \ _LIBCPP_HARDENING_MODE_DEBUG # endif // } HARDENING # define _LIBCPP_TOSTRING2(x) #x # define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x) // NOLINTNEXTLINE(libcpp-cpp-version-check) # if __cplusplus < 201103L # define _LIBCPP_CXX03_LANG # endif // TODO: Remove once we switch to GCC 14 # ifndef __has_extension # define __has_extension(__x) 0 # endif // TODO: Remove once we switch to GCC 14 # ifndef __has_feature # define __has_feature(__x) 0 # endif # ifndef __has_constexpr_builtin # define __has_constexpr_builtin(x) 0 # endif // This checks wheter a Clang module is built # ifndef __building_module # define __building_module(...) 0 # endif // '__is_identifier' returns '0' if '__x' is a reserved identifier provided by // the compiler and '1' otherwise. # ifndef __is_identifier # define __is_identifier(__x) 1 # endif # ifndef __has_declspec_attribute # define __has_declspec_attribute(__x) 0 # endif # define __has_keyword(__x) !(__is_identifier(__x)) # ifndef __has_warning # define __has_warning(...) 0 # endif # if !defined(_LIBCPP_COMPILER_CLANG_BASED) && __cplusplus < 201103L # error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11" # endif // FIXME: ABI detection should be done via compiler builtin macros. This // is just a placeholder until Clang implements such macros. For now assume // that Windows compilers pretending to be MSVC++ target the Microsoft ABI, // and allow the user to explicitly specify the ABI to handle cases where this // heuristic falls short. # if defined(_LIBCPP_ABI_FORCE_ITANIUM) && defined(_LIBCPP_ABI_FORCE_MICROSOFT) # error "Only one of _LIBCPP_ABI_FORCE_ITANIUM and _LIBCPP_ABI_FORCE_MICROSOFT can be defined" # elif defined(_LIBCPP_ABI_FORCE_ITANIUM) # define _LIBCPP_ABI_ITANIUM # elif defined(_LIBCPP_ABI_FORCE_MICROSOFT) # define _LIBCPP_ABI_MICROSOFT # else # if defined(_WIN32) && defined(_MSC_VER) # define _LIBCPP_ABI_MICROSOFT # else # define _LIBCPP_ABI_ITANIUM # endif # endif # if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME) # define _LIBCPP_ABI_VCRUNTIME # endif # if __has_feature(experimental_library) # ifndef _LIBCPP_ENABLE_EXPERIMENTAL # define _LIBCPP_ENABLE_EXPERIMENTAL # endif # endif // Incomplete features get their own specific disabling flags. This makes it // easier to grep for target specific flags once the feature is complete. # if !defined(_LIBCPP_ENABLE_EXPERIMENTAL) && !defined(_LIBCPP_BUILDING_LIBRARY) # define _LIBCPP_HAS_NO_INCOMPLETE_PSTL # define _LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN # define _LIBCPP_HAS_NO_EXPERIMENTAL_TZDB # define _LIBCPP_HAS_NO_EXPERIMENTAL_SYNCSTREAM # endif # if defined(__MVS__) # include // for __NATIVE_ASCII_F # endif # if defined(_WIN32) # define _LIBCPP_WIN32API # define _LIBCPP_SHORT_WCHAR 1 // Both MinGW and native MSVC provide a "MSVC"-like environment # define _LIBCPP_MSVCRT_LIKE // If mingw not explicitly detected, assume using MS C runtime only if // a MS compatibility version is specified. # if defined(_MSC_VER) && !defined(__MINGW32__) # define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library # endif # if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) # define _LIBCPP_HAS_BITSCAN64 # endif # define _LIBCPP_HAS_OPEN_WITH_WCHAR # endif // defined(_WIN32) # if defined(_AIX) && !defined(__64BIT__) // The size of wchar is 2 byte on 32-bit mode on AIX. # define _LIBCPP_SHORT_WCHAR 1 # endif // Libc++ supports various implementations of std::random_device. // // _LIBCPP_USING_DEV_RANDOM // Read entropy from the given file, by default `/dev/urandom`. // If a token is provided, it is assumed to be the path to a file // to read entropy from. This is the default behavior if nothing // else is specified. This implementation requires storing state // inside `std::random_device`. // // _LIBCPP_USING_ARC4_RANDOM // Use arc4random(). This allows obtaining random data even when // using sandboxing mechanisms. On some platforms like Apple, this // is the recommended source of entropy for user-space programs. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_GETENTROPY // Use getentropy(). // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_FUCHSIA_CPRNG // Use Fuchsia's zx_cprng_draw() system call, which is specified to // deliver high-quality entropy and cannot fail. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_NACL_RANDOM // NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, // including accesses to the special files under `/dev`. This implementation // uses the NaCL syscall `nacl_secure_random_init()` to get entropy. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. // // _LIBCPP_USING_WIN32_RANDOM // Use rand_s(), for use on Windows. // When this option is used, the token passed to `std::random_device`'s // constructor *must* be "/dev/urandom" -- anything else is an error. # if defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ defined(__DragonFly__) # define _LIBCPP_USING_ARC4_RANDOM # elif defined(__wasi__) || defined(__EMSCRIPTEN__) # define _LIBCPP_USING_GETENTROPY # elif defined(__Fuchsia__) # define _LIBCPP_USING_FUCHSIA_CPRNG # elif defined(__native_client__) # define _LIBCPP_USING_NACL_RANDOM # elif defined(_LIBCPP_WIN32API) # define _LIBCPP_USING_WIN32_RANDOM # else # define _LIBCPP_USING_DEV_RANDOM # endif # ifndef _LIBCPP_CXX03_LANG # define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp) # define _ALIGNAS_TYPE(x) alignas(x) # define _ALIGNAS(x) alignas(x) # define _LIBCPP_NORETURN [[noreturn]] # define _NOEXCEPT noexcept # define _NOEXCEPT_(...) noexcept(__VA_ARGS__) # define _LIBCPP_CONSTEXPR constexpr # else # define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) # define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x)))) # define _ALIGNAS(x) __attribute__((__aligned__(x))) # define _LIBCPP_NORETURN __attribute__((__noreturn__)) # define _LIBCPP_HAS_NO_NOEXCEPT # define nullptr __nullptr # define _NOEXCEPT throw() # define _NOEXCEPT_(...) # define static_assert(...) _Static_assert(__VA_ARGS__) # define decltype(...) __decltype(__VA_ARGS__) # define _LIBCPP_CONSTEXPR typedef __char16_t char16_t; typedef __char32_t char32_t; # endif # define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) # if defined(_LIBCPP_COMPILER_CLANG_BASED) // Objective-C++ features (opt-in) # if __has_feature(objc_arc) # define _LIBCPP_HAS_OBJC_ARC # endif # if __has_feature(objc_arc_weak) # define _LIBCPP_HAS_OBJC_ARC_WEAK # endif # if __has_extension(blocks) # define _LIBCPP_HAS_EXTENSION_BLOCKS # endif # if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) && defined(__APPLE__) # define _LIBCPP_HAS_BLOCKS_RUNTIME # endif # if !__has_feature(address_sanitizer) # define _LIBCPP_HAS_NO_ASAN # endif # define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__)) # define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__ # elif defined(_LIBCPP_COMPILER_GCC) # if !defined(__SANITIZE_ADDRESS__) # define _LIBCPP_HAS_NO_ASAN # endif # define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__)) # define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__ # endif // _LIBCPP_COMPILER_[CLANG|GCC] # if defined(_LIBCPP_OBJECT_FORMAT_COFF) # ifdef _DLL # define _LIBCPP_CRT_FUNC __declspec(dllimport) # else # define _LIBCPP_CRT_FUNC # endif # if defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) || (defined(__MINGW32__) && !defined(_LIBCPP_BUILDING_LIBRARY)) # define _LIBCPP_DLL_VIS # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI # elif defined(_LIBCPP_BUILDING_LIBRARY) # define _LIBCPP_DLL_VIS __declspec(dllexport) # if defined(__MINGW32__) # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # else # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS # endif # define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport) # else # define _LIBCPP_DLL_VIS __declspec(dllimport) # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllimport) # endif # define _LIBCPP_HIDDEN # define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS # define _LIBCPP_TEMPLATE_VIS # define _LIBCPP_TEMPLATE_DATA_VIS # define _LIBCPP_TYPE_VISIBILITY_DEFAULT # else # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) # define _LIBCPP_VISIBILITY(vis) __attribute__((__visibility__(vis))) # else # define _LIBCPP_VISIBILITY(vis) # endif # define _LIBCPP_HIDDEN _LIBCPP_VISIBILITY("hidden") # define _LIBCPP_TEMPLATE_DATA_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_VISIBILITY("default") # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_VISIBILITY("default") # define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS // TODO: Make this a proper customization point or remove the option to override it. # ifndef _LIBCPP_OVERRIDABLE_FUNC_VIS # define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default") # endif # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) // The inline should be removed once PR32114 is resolved # define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCPP_HIDDEN # else # define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS # endif // GCC doesn't support the type_visibility attribute, so we have to keep the visibility attribute on templates # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && !__has_attribute(__type_visibility__) # define _LIBCPP_TEMPLATE_VIS __attribute__((__visibility__("default"))) # else # define _LIBCPP_TEMPLATE_VIS # endif # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__) && \ _LIBCPP_CLANG_VER >= 1500 // FreeBSD customization # define _LIBCPP_TYPE_VISIBILITY_DEFAULT __attribute__((__type_visibility__("default"))) # else # define _LIBCPP_TYPE_VISIBILITY_DEFAULT # endif # endif // defined(_LIBCPP_OBJECT_FORMAT_COFF) # if __has_attribute(exclude_from_explicit_instantiation) # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__((__exclude_from_explicit_instantiation__)) # else // Try to approximate the effect of exclude_from_explicit_instantiation // (which is that entities are not assumed to be provided by explicit // template instantiations in the dylib) by always inlining those entities. # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE # endif # ifdef _LIBCPP_COMPILER_CLANG_BASED # define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") # define _LIBCPP_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(clang diagnostic ignored str)) # define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) # elif defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") # define _LIBCPP_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) # define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(GCC diagnostic ignored str)) # else # define _LIBCPP_DIAGNOSTIC_PUSH # define _LIBCPP_DIAGNOSTIC_POP # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) # define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) # endif # if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST # define _LIBCPP_HARDENING_SIG f # elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE # define _LIBCPP_HARDENING_SIG s # elif _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_HARDENING_SIG d # else # define _LIBCPP_HARDENING_SIG n // "none" # endif # ifdef _LIBCPP_HAS_NO_EXCEPTIONS # define _LIBCPP_EXCEPTIONS_SIG n # else # define _LIBCPP_EXCEPTIONS_SIG e # endif # define _LIBCPP_ODR_SIGNATURE \ _LIBCPP_CONCAT(_LIBCPP_CONCAT(_LIBCPP_HARDENING_SIG, _LIBCPP_EXCEPTIONS_SIG), _LIBCPP_VERSION) // This macro marks a symbol as being hidden from libc++'s ABI. This is achieved // on two levels: // 1. The symbol is given hidden visibility, which ensures that users won't start exporting // symbols from their dynamic library by means of using the libc++ headers. This ensures // that those symbols stay private to the dynamic library in which it is defined. // // 2. The symbol is given an ABI tag that encodes the ODR-relevant properties of the library. // This ensures that no ODR violation can arise from mixing two TUs compiled with different // versions or configurations of libc++ (such as exceptions vs no-exceptions). Indeed, if the // program contains two definitions of a function, the ODR requires them to be token-by-token // equivalent, and the linker is allowed to pick either definition and discard the other one. // // For example, if a program contains a copy of `vector::at()` compiled with exceptions enabled // *and* a copy of `vector::at()` compiled with exceptions disabled (by means of having two TUs // compiled with different settings), the two definitions are both visible by the linker and they // have the same name, but they have a meaningfully different implementation (one throws an exception // and the other aborts the program). This violates the ODR and makes the program ill-formed, and in // practice what will happen is that the linker will pick one of the definitions at random and will // discard the other one. This can quite clearly lead to incorrect program behavior. // // A similar reasoning holds for many other properties that are ODR-affecting. Essentially any // property that causes the code of a function to differ from the code in another configuration // can be considered ODR-affecting. In practice, we don't encode all such properties in the ABI // tag, but we encode the ones that we think are most important: library version, exceptions, and // hardening mode. // // Note that historically, solving this problem has been achieved in various ways, including // force-inlining all functions or giving internal linkage to all functions. Both these previous // solutions suffer from drawbacks that lead notably to code bloat. // // Note that we use _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION to ensure that we don't depend // on _LIBCPP_HIDE_FROM_ABI methods of classes explicitly instantiated in the dynamic library. // // Also note that the _LIBCPP_HIDE_FROM_ABI_VIRTUAL macro should be used on virtual functions // instead of _LIBCPP_HIDE_FROM_ABI. That macro does not use an ABI tag. Indeed, the mangled // name of a virtual function is part of its ABI, since some architectures like arm64e can sign // the virtual function pointer in the vtable based on the mangled name of the function. Since // we use an ABI tag that changes with each released version, the mangled name of the virtual // function would change, which is incorrect. Note that it doesn't make much sense to change // the implementation of a virtual function in an ABI-incompatible way in the first place, // since that would be an ABI break anyway. Hence, the lack of ABI tag should not be noticeable. // // The macro can be applied to record and enum types. When the tagged type is nested in // a record this "parent" record needs to have the macro too. Another use case for applying // this macro to records and unions is to apply an ABI tag to inline constexpr variables. // This can be useful for inline variables that are implementation details which are expected // to change in the future. // // TODO: We provide a escape hatch with _LIBCPP_NO_ABI_TAG for folks who want to avoid increasing // the length of symbols with an ABI tag. In practice, we should remove the escape hatch and // use compression mangling instead, see https://github.com/itanium-cxx-abi/cxx-abi/issues/70. # ifndef _LIBCPP_NO_ABI_TAG # define _LIBCPP_HIDE_FROM_ABI \ _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION \ __attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_ODR_SIGNATURE)))) # else # define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # endif # define _LIBCPP_HIDE_FROM_ABI_VIRTUAL _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION # ifdef _LIBCPP_BUILDING_LIBRARY # if _LIBCPP_ABI_VERSION > 1 # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI # else # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 # endif # else # define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI # endif // TODO: Remove this workaround once we drop support for Clang 16 # if __has_warning("-Wc++23-extensions") # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++23-extensions") # else # define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++2b-extensions") # endif // Clang modules take a significant compile time hit when pushing and popping diagnostics. // Since all the headers are marked as system headers in the modulemap, we can simply disable this // pushing and popping when building with clang modules. # if !__has_feature(modules) # define _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ _LIBCPP_DIAGNOSTIC_PUSH \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++11-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ _LIBCPP_CLANG_DIAGNOSTIC_IGNORED_CXX23_EXTENSION \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++14-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++17-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++20-extensions") \ _LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wc++23-extensions") # define _LIBCPP_POP_EXTENSION_DIAGNOSTICS _LIBCPP_DIAGNOSTIC_POP # else # define _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS # define _LIBCPP_POP_EXTENSION_DIAGNOSTICS # endif // Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect. // clang-format off # define _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_PUSH_EXTENSION_DIAGNOSTICS \ namespace _LIBCPP_TYPE_VISIBILITY_DEFAULT std { \ inline namespace _LIBCPP_ABI_NAMESPACE { # define _LIBCPP_END_NAMESPACE_STD }} _LIBCPP_POP_EXTENSION_DIAGNOSTICS #ifdef _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE # define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD namespace filesystem { # define _LIBCPP_END_NAMESPACE_FILESYSTEM } _LIBCPP_END_NAMESPACE_STD #else # define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD \ inline namespace __fs { namespace filesystem { # define _LIBCPP_END_NAMESPACE_FILESYSTEM }} _LIBCPP_END_NAMESPACE_STD #endif // clang-format on # if __has_attribute(__enable_if__) # define _LIBCPP_PREFERRED_OVERLOAD __attribute__((__enable_if__(true, ""))) # endif # if !defined(__SIZEOF_INT128__) || defined(_MSC_VER) # define _LIBCPP_HAS_NO_INT128 # endif # ifdef _LIBCPP_CXX03_LANG # define _LIBCPP_DECLARE_STRONG_ENUM(x) \ struct _LIBCPP_EXPORTED_FROM_ABI x { \ enum __lx // clang-format off # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) \ __lx __v_; \ _LIBCPP_HIDE_FROM_ABI x(__lx __v) : __v_(__v) {} \ _LIBCPP_HIDE_FROM_ABI explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \ _LIBCPP_HIDE_FROM_ABI operator int() const { return __v_; } \ }; // clang-format on # else // _LIBCPP_CXX03_LANG # define _LIBCPP_DECLARE_STRONG_ENUM(x) enum class x # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) # endif // _LIBCPP_CXX03_LANG # if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCPP_MSVCRT_LIKE) || defined(__NetBSD__) # define _LIBCPP_LOCALE__L_EXTENSIONS 1 # endif # ifdef __FreeBSD__ # define _DECLARE_C99_LDBL_MATH 1 # endif // If we are getting operator new from the MSVC CRT, then allocation overloads // for align_val_t were added in 19.12, aka VS 2017 version 15.3. # if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912 # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION # elif defined(_LIBCPP_ABI_VCRUNTIME) && !defined(__cpp_aligned_new) // We're deferring to Microsoft's STL to provide aligned new et al. We don't // have it unless the language feature test macro is defined. # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION # elif defined(__MVS__) # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION # endif # if defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) || (!defined(__cpp_aligned_new) || __cpp_aligned_new < 201606) # define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION # endif // It is not yet possible to use aligned_alloc() on all Apple platforms since // 10.15 was the first version to ship an implementation of aligned_alloc(). # if defined(__APPLE__) # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) # define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC # endif # elif defined(__ANDROID__) && __ANDROID_API__ < 28 // Android only provides aligned_alloc when targeting API 28 or higher. # define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC # endif # if defined(__APPLE__) || defined(__FreeBSD__) # define _LIBCPP_HAS_DEFAULTRUNELOCALE # endif # if defined(__APPLE__) || defined(__FreeBSD__) # define _LIBCPP_WCTYPE_IS_MASK # endif # if _LIBCPP_STD_VER <= 17 || !defined(__cpp_char8_t) # define _LIBCPP_HAS_NO_CHAR8_T # endif // Deprecation macros. // // Deprecations warnings are always enabled, except when users explicitly opt-out // by defining _LIBCPP_DISABLE_DEPRECATION_WARNINGS. # if !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) # if __has_attribute(__deprecated__) # define _LIBCPP_DEPRECATED __attribute__((__deprecated__)) # define _LIBCPP_DEPRECATED_(m) __attribute__((__deprecated__(m))) # elif _LIBCPP_STD_VER >= 14 # define _LIBCPP_DEPRECATED [[deprecated]] # define _LIBCPP_DEPRECATED_(m) [[deprecated(m)]] # else # define _LIBCPP_DEPRECATED # define _LIBCPP_DEPRECATED_(m) # endif # else # define _LIBCPP_DEPRECATED # define _LIBCPP_DEPRECATED_(m) # endif # if _LIBCPP_STD_VER < 20 # define _LIBCPP_DEPRECATED_ATOMIC_SYNC \ _LIBCPP_DEPRECATED_("The C++20 synchronization library has been deprecated prior to C++20. Please update to " \ "using -std=c++20 if you need to use these facilities.") # else # define _LIBCPP_DEPRECATED_ATOMIC_SYNC /* nothing */ # endif # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX11 # endif # if _LIBCPP_STD_VER >= 14 # define _LIBCPP_DEPRECATED_IN_CXX14 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX14 # endif # if _LIBCPP_STD_VER >= 17 # define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX17 # endif # if _LIBCPP_STD_VER >= 20 # define _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX20 # endif # if _LIBCPP_STD_VER >= 23 # define _LIBCPP_DEPRECATED_IN_CXX23 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX23 # endif # if _LIBCPP_STD_VER >= 26 # define _LIBCPP_DEPRECATED_IN_CXX26 _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_IN_CXX26 # endif # if !defined(_LIBCPP_HAS_NO_CHAR8_T) # define _LIBCPP_DEPRECATED_WITH_CHAR8_T _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_WITH_CHAR8_T # endif // Macros to enter and leave a state where deprecation warnings are suppressed. # if defined(_LIBCPP_COMPILER_CLANG_BASED) || defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \ _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated\"") \ _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") # define _LIBCPP_SUPPRESS_DEPRECATED_POP _Pragma("GCC diagnostic pop") # else # define _LIBCPP_SUPPRESS_DEPRECATED_PUSH # define _LIBCPP_SUPPRESS_DEPRECATED_POP # endif # if _LIBCPP_STD_VER <= 11 # define _LIBCPP_EXPLICIT_SINCE_CXX14 # else # define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit # endif # if _LIBCPP_STD_VER >= 23 # define _LIBCPP_EXPLICIT_SINCE_CXX23 explicit # else # define _LIBCPP_EXPLICIT_SINCE_CXX23 # endif # if _LIBCPP_STD_VER >= 14 # define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX14 # endif # if _LIBCPP_STD_VER >= 17 # define _LIBCPP_CONSTEXPR_SINCE_CXX17 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX17 # endif # if _LIBCPP_STD_VER >= 20 # define _LIBCPP_CONSTEXPR_SINCE_CXX20 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX20 # endif # if _LIBCPP_STD_VER >= 23 # define _LIBCPP_CONSTEXPR_SINCE_CXX23 constexpr # else # define _LIBCPP_CONSTEXPR_SINCE_CXX23 # endif # ifndef _LIBCPP_WEAK # define _LIBCPP_WEAK __attribute__((__weak__)) # endif // Thread API // clang-format off # if !defined(_LIBCPP_HAS_NO_THREADS) && \ !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \ !defined(_LIBCPP_HAS_THREAD_API_WIN32) && \ !defined(_LIBCPP_HAS_THREAD_API_EXTERNAL) # if defined(__FreeBSD__) || \ defined(__wasi__) || \ defined(__NetBSD__) || \ defined(__OpenBSD__) || \ defined(__NuttX__) || \ defined(__linux__) || \ defined(__GNU__) || \ defined(__APPLE__) || \ defined(__MVS__) || \ defined(_AIX) || \ defined(__EMSCRIPTEN__) // clang-format on # define _LIBCPP_HAS_THREAD_API_PTHREAD # elif defined(__Fuchsia__) // TODO(44575): Switch to C11 thread API when possible. # define _LIBCPP_HAS_THREAD_API_PTHREAD # elif defined(_LIBCPP_WIN32API) # define _LIBCPP_HAS_THREAD_API_WIN32 # else # error "No thread API" # endif // _LIBCPP_HAS_THREAD_API # endif // _LIBCPP_HAS_NO_THREADS # if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) # if defined(__ANDROID__) && __ANDROID_API__ >= 30 # define _LIBCPP_HAS_COND_CLOCKWAIT # elif defined(_LIBCPP_GLIBC_PREREQ) # if _LIBCPP_GLIBC_PREREQ(2, 30) # define _LIBCPP_HAS_COND_CLOCKWAIT # endif # endif # endif # if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) # error _LIBCPP_HAS_THREAD_API_PTHREAD may only be defined when \ _LIBCPP_HAS_NO_THREADS is not defined. # endif # if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_EXTERNAL) # error _LIBCPP_HAS_THREAD_API_EXTERNAL may not be defined when \ _LIBCPP_HAS_NO_THREADS is defined. # endif # if defined(_LIBCPP_HAS_NO_MONOTONIC_CLOCK) && !defined(_LIBCPP_HAS_NO_THREADS) # error _LIBCPP_HAS_NO_MONOTONIC_CLOCK may only be defined when \ _LIBCPP_HAS_NO_THREADS is defined. # endif # if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__) # define __STDCPP_THREADS__ 1 # endif // The glibc and Bionic implementation of pthreads implements // pthread_mutex_destroy as nop for regular mutexes. Additionally, Win32 // mutexes have no destroy mechanism. // // This optimization can't be performed on Apple platforms, where // pthread_mutex_destroy can allow the kernel to release resources. // See https://llvm.org/D64298 for details. // // TODO(EricWF): Enable this optimization on Bionic after speaking to their // respective stakeholders. // clang-format off # if (defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) || \ (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || \ defined(_LIBCPP_HAS_THREAD_API_WIN32) // clang-format on # define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION # endif // Destroying a condvar is a nop on Windows. // // This optimization can't be performed on Apple platforms, where // pthread_cond_destroy can allow the kernel to release resources. // See https://llvm.org/D64298 for details. // // TODO(EricWF): This is potentially true for some pthread implementations // as well. # if (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || defined(_LIBCPP_HAS_THREAD_API_WIN32) # define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION # endif # if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \ defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__) # define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE # endif # if __has_feature(cxx_atomic) || __has_extension(c_atomic) || __has_keyword(_Atomic) # define _LIBCPP_HAS_C_ATOMIC_IMP # elif defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_HAS_GCC_ATOMIC_IMP # endif # if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \ !defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP) # define _LIBCPP_HAS_NO_ATOMIC_HEADER # else # ifndef _LIBCPP_ATOMIC_FLAG_TYPE # define _LIBCPP_ATOMIC_FLAG_TYPE bool # endif # endif # if defined(__FreeBSD__) && defined(__clang__) && __has_attribute(__no_thread_safety_analysis__) # define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS __attribute__((__no_thread_safety_analysis__)) # else # define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS # endif # if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS) # if defined(__clang__) && __has_attribute(acquire_capability) // Work around the attribute handling in clang. When both __declspec and // __attribute__ are present, the processing goes awry preventing the definition // of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus // combining the two does work. # if !defined(_MSC_VER) # define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS # endif # endif # endif # ifdef _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS # define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) __attribute__((x)) # else # define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) # endif # if _LIBCPP_STD_VER >= 20 # define _LIBCPP_CONSTINIT constinit # elif __has_attribute(__require_constant_initialization__) # define _LIBCPP_CONSTINIT __attribute__((__require_constant_initialization__)) # else # define _LIBCPP_CONSTINIT # endif # if defined(__CUDACC__) || defined(__CUDA_ARCH__) || defined(__CUDA_LIBDEVICE__) // The CUDA SDK contains an unfortunate definition for the __noinline__ macro, // which breaks the regular __attribute__((__noinline__)) syntax. Therefore, // when compiling for CUDA we use the non-underscored version of the noinline // attribute. // // This is a temporary workaround and we still expect the CUDA SDK team to solve // this issue properly in the SDK headers. // // See https://github.com/llvm/llvm-project/pull/73838 for more details. # define _LIBCPP_NOINLINE __attribute__((noinline)) # elif __has_attribute(__noinline__) # define _LIBCPP_NOINLINE __attribute__((__noinline__)) # else # define _LIBCPP_NOINLINE # endif // We often repeat things just for handling wide characters in the library. // When wide characters are disabled, it can be useful to have a quick way of // disabling it without having to resort to #if-#endif, which has a larger // impact on readability. # if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) # define _LIBCPP_IF_WIDE_CHARACTERS(...) # else # define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__ # endif // clang-format off # define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")") _Pragma("push_macro(\"refresh\")") _Pragma("push_macro(\"move\")") _Pragma("push_macro(\"erase\")") # define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")") _Pragma("pop_macro(\"refresh\")") _Pragma("pop_macro(\"move\")") _Pragma("pop_macro(\"erase\")") // clang-format on # ifndef _LIBCPP_NO_AUTO_LINK # if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY) # if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) # pragma comment(lib, "c++.lib") # else # pragma comment(lib, "libc++.lib") # endif # endif // defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY) # endif // _LIBCPP_NO_AUTO_LINK // Configures the fopen close-on-exec mode character, if any. This string will // be appended to any mode string used by fstream for fopen/fdopen. // // Not all platforms support this, but it helps avoid fd-leaks on platforms that // do. # if defined(__BIONIC__) # define _LIBCPP_FOPEN_CLOEXEC_MODE "e" # else # define _LIBCPP_FOPEN_CLOEXEC_MODE # endif # if __has_cpp_attribute(msvc::no_unique_address) // MSVC implements [[no_unique_address]] as a silent no-op currently. // (If/when MSVC breaks its C++ ABI, it will be changed to work as intended.) // However, MSVC implements [[msvc::no_unique_address]] which does what // [[no_unique_address]] is supposed to do, in general. // Clang-cl does not yet (14.0) implement either [[no_unique_address]] or // [[msvc::no_unique_address]] though. If/when it does implement // [[msvc::no_unique_address]], this should be preferred though. # define _LIBCPP_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] # elif __has_cpp_attribute(no_unique_address) # define _LIBCPP_NO_UNIQUE_ADDRESS [[__no_unique_address__]] # else # define _LIBCPP_NO_UNIQUE_ADDRESS /* nothing */ // Note that this can be replaced by #error as soon as clang-cl // implements msvc::no_unique_address, since there should be no C++20 // compiler that doesn't support one of the two attributes at that point. // We generally don't want to use this macro outside of C++20-only code, // because using it conditionally in one language version only would make // the ABI inconsistent. # endif // c8rtomb() and mbrtoc8() were added in C++20 and C23. Support for these // functions is gradually being added to existing C libraries. The conditions // below check for known C library versions and conditions under which these // functions are declared by the C library. # define _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8 // GNU libc 2.36 and newer declare c8rtomb() and mbrtoc8() in C++ modes if // __cpp_char8_t is defined or if C2X extensions are enabled. Determining // the latter depends on internal GNU libc details that are not appropriate // to depend on here, so any declarations present when __cpp_char8_t is not // defined are ignored. # if defined(_LIBCPP_GLIBC_PREREQ) # if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t) # undef _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8 # endif # endif // There are a handful of public standard library types that are intended to // support CTAD but don't need any explicit deduction guides to do so. This // macro is used to mark them as such, which suppresses the // '-Wctad-maybe-unsupported' compiler warning when CTAD is used in user code // with these classes. # if _LIBCPP_STD_VER >= 17 # ifdef _LIBCPP_COMPILER_CLANG_BASED # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) \ template \ [[maybe_unused]] _ClassName(typename _Tag::__allow_ctad...)->_ClassName<_Tag...> # else # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(ClassName) \ template \ ClassName(typename _Tag::__allow_ctad...)->ClassName<_Tag...> # endif # else # define _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(_ClassName) static_assert(true, "") # endif // TODO(varconst): currently, there are bugs in Clang's intrinsics when handling Objective-C++ `id`, so don't use // compiler intrinsics in the Objective-C++ mode. # ifdef __OBJC__ # define _LIBCPP_WORKAROUND_OBJCXX_COMPILER_INTRINSICS # endif # define _PSTL_PRAGMA(x) _Pragma(#x) // Enable SIMD for compilers that support OpenMP 4.0 # if (defined(_OPENMP) && _OPENMP >= 201307) # define _PSTL_UDR_PRESENT # define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(omp simd) # define _PSTL_PRAGMA_DECLARE_SIMD _PSTL_PRAGMA(omp declare simd) # define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(omp simd reduction(PRM)) # define _PSTL_PRAGMA_SIMD_SCAN(PRM) _PSTL_PRAGMA(omp simd reduction(inscan, PRM)) # define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan inclusive(PRM)) # define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan exclusive(PRM)) // Declaration of reduction functor, where // NAME - the name of the functor // OP - type of the callable object with the reduction operation // omp_in - refers to the local partial result // omp_out - refers to the final value of the combiner operator // omp_priv - refers to the private copy of the initial value // omp_orig - refers to the original variable to be reduced # define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) \ _PSTL_PRAGMA(omp declare reduction(NAME:OP : omp_out(omp_in)) initializer(omp_priv = omp_orig)) # elif defined(_LIBCPP_COMPILER_CLANG_BASED) # define _PSTL_PRAGMA_SIMD _Pragma("clang loop vectorize(enable) interleave(enable)") # define _PSTL_PRAGMA_DECLARE_SIMD # define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _Pragma("clang loop vectorize(enable) interleave(enable)") # define _PSTL_PRAGMA_SIMD_SCAN(PRM) _Pragma("clang loop vectorize(enable) interleave(enable)") # define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) # else // (defined(_OPENMP) && _OPENMP >= 201307) # define _PSTL_PRAGMA_SIMD # define _PSTL_PRAGMA_DECLARE_SIMD # define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) # define _PSTL_PRAGMA_SIMD_SCAN(PRM) # define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) # define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP) # endif // (defined(_OPENMP) && _OPENMP >= 201307) # define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED // Optional attributes - these are useful for a better QoI, but not required to be available # if __has_attribute(__no_sanitize__) && !defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_NO_CFI __attribute__((__no_sanitize__("cfi"))) # else # define _LIBCPP_NO_CFI # endif # if __has_attribute(__malloc__) # define _LIBCPP_NOALIAS __attribute__((__malloc__)) # else # define _LIBCPP_NOALIAS # endif # if __has_attribute(__using_if_exists__) # define _LIBCPP_USING_IF_EXISTS __attribute__((__using_if_exists__)) # else # define _LIBCPP_USING_IF_EXISTS # endif # if __has_cpp_attribute(__nodiscard__) # define _LIBCPP_NODISCARD [[__nodiscard__]] # else // We can't use GCC's [[gnu::warn_unused_result]] and // __attribute__((warn_unused_result)), because GCC does not silence them via // (void) cast. # define _LIBCPP_NODISCARD # endif # if __has_attribute(__no_destroy__) # define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__)) # else # define _LIBCPP_NO_DESTROY # endif # if __has_attribute(__diagnose_if__) # define _LIBCPP_DIAGNOSE_WARNING(...) __attribute__((__diagnose_if__(__VA_ARGS__, "warning"))) # else # define _LIBCPP_DIAGNOSE_WARNING(...) # endif // Use a function like macro to imply that it must be followed by a semicolon # if __has_cpp_attribute(fallthrough) # define _LIBCPP_FALLTHROUGH() [[fallthrough]] # elif __has_attribute(__fallthrough__) # define _LIBCPP_FALLTHROUGH() __attribute__((__fallthrough__)) # else # define _LIBCPP_FALLTHROUGH() ((void)0) # endif # if __has_cpp_attribute(_Clang::__lifetimebound__) # define _LIBCPP_LIFETIMEBOUND [[_Clang::__lifetimebound__]] # else # define _LIBCPP_LIFETIMEBOUND # endif # if __has_attribute(__nodebug__) # define _LIBCPP_NODEBUG __attribute__((__nodebug__)) # else # define _LIBCPP_NODEBUG # endif # if __has_attribute(__standalone_debug__) # define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__)) # else # define _LIBCPP_STANDALONE_DEBUG # endif # if __has_attribute(__preferred_name__) # define _LIBCPP_PREFERRED_NAME(x) __attribute__((__preferred_name__(x))) # else # define _LIBCPP_PREFERRED_NAME(x) # endif # if __has_attribute(__no_sanitize__) # define _LIBCPP_NO_SANITIZE(...) __attribute__((__no_sanitize__(__VA_ARGS__))) # else # define _LIBCPP_NO_SANITIZE(...) # endif # if __has_attribute(__init_priority__) # define _LIBCPP_INIT_PRIORITY_MAX __attribute__((__init_priority__(100))) # else # define _LIBCPP_INIT_PRIORITY_MAX # endif # if __has_attribute(__format__) // The attribute uses 1-based indices for ordinary and static member functions. // The attribute uses 2-based indices for non-static member functions. # define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \ __attribute__((__format__(archetype, format_string_index, first_format_arg_index))) # else # define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */ # endif # if __has_attribute(__packed__) # define _LIBCPP_PACKED __attribute__((__packed__)) # else # define _LIBCPP_PACKED # endif # if defined(_LIBCPP_ABI_MICROSOFT) && __has_declspec_attribute(empty_bases) # define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases) # else # define _LIBCPP_DECLSPEC_EMPTY_BASES # endif // Allow for build-time disabling of unsigned integer sanitization # if __has_attribute(no_sanitize) && !defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow"))) # else # define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK # endif // Clang-18 has support for deducing this, but it does not set the FTM. # if defined(__cpp_explicit_this_parameter) || (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1800) # define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER # endif #endif // __cplusplus #endif // _LIBCPP___CONFIG diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h index ebd92f264d90..d2126a03db90 100644 --- a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h +++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h @@ -1,1212 +1,1235 @@ //===-- llvm/TargetParser/Triple.h - Target triple helper class--*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_TARGETPARSER_TRIPLE_H #define LLVM_TARGETPARSER_TRIPLE_H #include "llvm/ADT/Twine.h" #include "llvm/Support/VersionTuple.h" // Some system headers or GCC predefined macros conflict with identifiers in // this file. Undefine them here. #undef NetBSD #undef mips #undef sparc namespace llvm { /// Triple - Helper class for working with autoconf configuration names. For /// historical reasons, we also call these 'triples' (they used to contain /// exactly three fields). /// /// Configuration names are strings in the canonical form: /// ARCHITECTURE-VENDOR-OPERATING_SYSTEM /// or /// ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT /// /// This class is used for clients which want to support arbitrary /// configuration names, but also want to implement certain special /// behavior for particular configurations. This class isolates the mapping /// from the components of the configuration name to well known IDs. /// /// At its core the Triple class is designed to be a wrapper for a triple /// string; the constructor does not change or normalize the triple string. /// Clients that need to handle the non-canonical triples that users often /// specify should use the normalize method. /// /// See autoconf/config.guess for a glimpse into what configuration names /// look like in practice. class Triple { public: enum ArchType { UnknownArch, arm, // ARM (little endian): arm, armv.*, xscale armeb, // ARM (big endian): armeb aarch64, // AArch64 (little endian): aarch64 aarch64_be, // AArch64 (big endian): aarch64_be aarch64_32, // AArch64 (little endian) ILP32: aarch64_32 arc, // ARC: Synopsys ARC avr, // AVR: Atmel AVR microcontroller bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) bpfeb, // eBPF or extended BPF or 64-bit BPF (big endian) csky, // CSKY: csky dxil, // DXIL 32-bit DirectX bytecode hexagon, // Hexagon: hexagon loongarch32, // LoongArch (32-bit): loongarch32 loongarch64, // LoongArch (64-bit): loongarch64 m68k, // M68k: Motorola 680x0 family mips, // MIPS: mips, mipsallegrex, mipsr6 mipsel, // MIPSEL: mipsel, mipsallegrexe, mipsr6el mips64, // MIPS64: mips64, mips64r6, mipsn32, mipsn32r6 mips64el, // MIPS64EL: mips64el, mips64r6el, mipsn32el, mipsn32r6el msp430, // MSP430: msp430 ppc, // PPC: powerpc ppcle, // PPCLE: powerpc (little endian) ppc64, // PPC64: powerpc64, ppu ppc64le, // PPC64LE: powerpc64le r600, // R600: AMD GPUs HD2XXX - HD6XXX amdgcn, // AMDGCN: AMD GCN GPUs riscv32, // RISC-V (32-bit): riscv32 riscv64, // RISC-V (64-bit): riscv64 sparc, // Sparc: sparc sparcv9, // Sparcv9: Sparcv9 sparcel, // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant systemz, // SystemZ: s390x tce, // TCE (http://tce.cs.tut.fi/): tce tcele, // TCE little endian (http://tce.cs.tut.fi/): tcele thumb, // Thumb (little endian): thumb, thumbv.* thumbeb, // Thumb (big endian): thumbeb x86, // X86: i[3-9]86 x86_64, // X86-64: amd64, x86_64 xcore, // XCore: xcore xtensa, // Tensilica: Xtensa nvptx, // NVPTX: 32-bit nvptx64, // NVPTX: 64-bit le32, // le32: generic little-endian 32-bit CPU (PNaCl) le64, // le64: generic little-endian 64-bit CPU (PNaCl) amdil, // AMDIL amdil64, // AMDIL with 64-bit pointers hsail, // AMD HSAIL hsail64, // AMD HSAIL with 64-bit pointers spir, // SPIR: standard portable IR for OpenCL 32-bit version spir64, // SPIR: standard portable IR for OpenCL 64-bit version spirv, // SPIR-V with logical memory layout. spirv32, // SPIR-V with 32-bit pointers spirv64, // SPIR-V with 64-bit pointers kalimba, // Kalimba: generic kalimba shave, // SHAVE: Movidius vector VLIW processors lanai, // Lanai: Lanai 32-bit wasm32, // WebAssembly with 32-bit pointers wasm64, // WebAssembly with 64-bit pointers renderscript32, // 32-bit RenderScript renderscript64, // 64-bit RenderScript ve, // NEC SX-Aurora Vector Engine LastArchType = ve }; enum SubArchType { NoSubArch, ARMSubArch_v9_5a, ARMSubArch_v9_4a, ARMSubArch_v9_3a, ARMSubArch_v9_2a, ARMSubArch_v9_1a, ARMSubArch_v9, ARMSubArch_v8_9a, ARMSubArch_v8_8a, ARMSubArch_v8_7a, ARMSubArch_v8_6a, ARMSubArch_v8_5a, ARMSubArch_v8_4a, ARMSubArch_v8_3a, ARMSubArch_v8_2a, ARMSubArch_v8_1a, ARMSubArch_v8, ARMSubArch_v8r, ARMSubArch_v8m_baseline, ARMSubArch_v8m_mainline, ARMSubArch_v8_1m_mainline, ARMSubArch_v7, ARMSubArch_v7em, ARMSubArch_v7m, ARMSubArch_v7s, ARMSubArch_v7k, ARMSubArch_v7ve, ARMSubArch_v6, ARMSubArch_v6m, ARMSubArch_v6k, ARMSubArch_v6t2, ARMSubArch_v5, ARMSubArch_v5te, ARMSubArch_v4t, AArch64SubArch_arm64e, AArch64SubArch_arm64ec, KalimbaSubArch_v3, KalimbaSubArch_v4, KalimbaSubArch_v5, MipsSubArch_r6, PPCSubArch_spe, // SPIR-V sub-arch corresponds to its version. SPIRVSubArch_v10, SPIRVSubArch_v11, SPIRVSubArch_v12, SPIRVSubArch_v13, SPIRVSubArch_v14, SPIRVSubArch_v15, SPIRVSubArch_v16, // DXIL sub-arch corresponds to its version. DXILSubArch_v1_0, DXILSubArch_v1_1, DXILSubArch_v1_2, DXILSubArch_v1_3, DXILSubArch_v1_4, DXILSubArch_v1_5, DXILSubArch_v1_6, DXILSubArch_v1_7, DXILSubArch_v1_8, LatestDXILSubArch = DXILSubArch_v1_8, }; enum VendorType { UnknownVendor, Apple, PC, SCEI, Freescale, IBM, ImaginationTechnologies, MipsTechnologies, NVIDIA, CSR, AMD, Mesa, SUSE, OpenEmbedded, LastVendorType = OpenEmbedded }; enum OSType { UnknownOS, Darwin, DragonFly, FreeBSD, Fuchsia, IOS, KFreeBSD, Linux, Lv2, // PS3 MacOSX, NetBSD, OpenBSD, Solaris, UEFI, Win32, ZOS, Haiku, RTEMS, NaCl, // Native Client AIX, CUDA, // NVIDIA CUDA NVCL, // NVIDIA OpenCL AMDHSA, // AMD HSA Runtime PS4, PS5, ELFIAMCU, TvOS, // Apple tvOS WatchOS, // Apple watchOS BridgeOS, // Apple bridgeOS DriverKit, // Apple DriverKit XROS, // Apple XROS Mesa3D, AMDPAL, // AMD PAL Runtime HermitCore, // HermitCore Unikernel/Multikernel Hurd, // GNU/Hurd WASI, // Experimental WebAssembly OS Emscripten, ShaderModel, // DirectX ShaderModel LiteOS, Serenity, Vulkan, // Vulkan SPIR-V LastOSType = Vulkan }; enum EnvironmentType { UnknownEnvironment, GNU, GNUABIN32, GNUABI64, GNUEABI, GNUEABIHF, GNUF32, GNUF64, GNUSF, GNUX32, GNUILP32, CODE16, EABI, EABIHF, Android, Musl, MuslEABI, MuslEABIHF, MuslX32, MSVC, Itanium, Cygnus, CoreCLR, Simulator, // Simulator variants of other systems, e.g., Apple's iOS MacABI, // Mac Catalyst variant of Apple's iOS deployment target. // Shader Stages // The order of these values matters, and must be kept in sync with the // language options enum in Clang. The ordering is enforced in // static_asserts in Triple.cpp and in Clang. Pixel, Vertex, Geometry, Hull, Domain, Compute, Library, RayGeneration, Intersection, AnyHit, ClosestHit, Miss, Callable, Mesh, Amplification, OpenCL, OpenHOS, PAuthTest, - LastEnvironmentType = PAuthTest + GNUT64, + GNUEABIT64, + GNUEABIHFT64, + + LastEnvironmentType = GNUEABIHFT64 }; enum ObjectFormatType { UnknownObjectFormat, COFF, DXContainer, ELF, GOFF, MachO, SPIRV, Wasm, XCOFF, }; private: std::string Data; /// The parsed arch type. ArchType Arch{}; /// The parsed subarchitecture type. SubArchType SubArch{}; /// The parsed vendor type. VendorType Vendor{}; /// The parsed OS type. OSType OS{}; /// The parsed Environment type. EnvironmentType Environment{}; /// The object format type. ObjectFormatType ObjectFormat{}; public: /// @name Constructors /// @{ /// Default constructor is the same as an empty string and leaves all /// triple fields unknown. Triple() = default; explicit Triple(const Twine &Str); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr); Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr, const Twine &EnvironmentStr); bool operator==(const Triple &Other) const { return Arch == Other.Arch && SubArch == Other.SubArch && Vendor == Other.Vendor && OS == Other.OS && Environment == Other.Environment && ObjectFormat == Other.ObjectFormat; } bool operator!=(const Triple &Other) const { return !(*this == Other); } /// @} /// @name Normalization /// @{ /// Turn an arbitrary machine specification into the canonical triple form (or /// something sensible that the Triple class understands if nothing better can /// reasonably be done). In particular, it handles the common case in which /// otherwise valid components are in the wrong order. static std::string normalize(StringRef Str); /// Return the normalized form of this triple's string. std::string normalize() const { return normalize(Data); } /// @} /// @name Typed Component Access /// @{ /// Get the parsed architecture type of this triple. ArchType getArch() const { return Arch; } /// get the parsed subarchitecture type for this triple. SubArchType getSubArch() const { return SubArch; } /// Get the parsed vendor type of this triple. VendorType getVendor() const { return Vendor; } /// Get the parsed operating system type of this triple. OSType getOS() const { return OS; } /// Does this triple have the optional environment (fourth) component? bool hasEnvironment() const { return getEnvironmentName() != ""; } /// Get the parsed environment type of this triple. EnvironmentType getEnvironment() const { return Environment; } /// Parse the version number from the OS name component of the /// triple, if present. /// /// For example, "fooos1.2.3" would return (1, 2, 3). VersionTuple getEnvironmentVersion() const; /// Get the object format for this triple. ObjectFormatType getObjectFormat() const { return ObjectFormat; } /// Parse the version number from the OS name component of the triple, if /// present. /// /// For example, "fooos1.2.3" would return (1, 2, 3). VersionTuple getOSVersion() const; /// Return just the major version number, this is specialized because it is a /// common query. unsigned getOSMajorVersion() const { return getOSVersion().getMajor(); } /// Parse the version number as with getOSVersion and then translate generic /// "darwin" versions to the corresponding OS X versions. This may also be /// called with IOS triples but the OS X version number is just set to a /// constant 10.4.0 in that case. Returns true if successful. bool getMacOSXVersion(VersionTuple &Version) const; /// Parse the version number as with getOSVersion. This should only be called /// with IOS or generic triples. VersionTuple getiOSVersion() const; /// Parse the version number as with getOSVersion. This should only be called /// with WatchOS or generic triples. VersionTuple getWatchOSVersion() const; /// Parse the version number as with getOSVersion. VersionTuple getDriverKitVersion() const; /// Parse the Vulkan version number from the OSVersion and SPIR-V version /// (SubArch). This should only be called with Vulkan SPIR-V triples. VersionTuple getVulkanVersion() const; /// Parse the DXIL version number from the OSVersion and DXIL version /// (SubArch). This should only be called with DXIL triples. VersionTuple getDXILVersion() const; /// @} /// @name Direct Component Access /// @{ const std::string &str() const { return Data; } const std::string &getTriple() const { return Data; } /// Get the architecture (first) component of the triple. StringRef getArchName() const; /// Get the vendor (second) component of the triple. StringRef getVendorName() const; /// Get the operating system (third) component of the triple. StringRef getOSName() const; /// Get the optional environment (fourth) component of the triple, or "" if /// empty. StringRef getEnvironmentName() const; /// Get the operating system and optional environment components as a single /// string (separated by a '-' if the environment component is present). StringRef getOSAndEnvironmentName() const; /// Get the version component of the environment component as a single /// string (the version after the environment). /// /// For example, "fooos1.2.3" would return "1.2.3". StringRef getEnvironmentVersionString() const; /// @} /// @name Convenience Predicates /// @{ /// Returns the pointer width of this architecture. static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch); /// Returns the pointer width of this architecture. unsigned getArchPointerBitWidth() const { return getArchPointerBitWidth(getArch()); } /// Test whether the architecture is 64-bit /// /// Note that this tests for 64-bit pointer width, and nothing else. Note /// that we intentionally expose only three predicates, 64-bit, 32-bit, and /// 16-bit. The inner details of pointer width for particular architectures /// is not summed up in the triple, and so only a coarse grained predicate /// system is provided. bool isArch64Bit() const; /// Test whether the architecture is 32-bit /// /// Note that this tests for 32-bit pointer width, and nothing else. bool isArch32Bit() const; /// Test whether the architecture is 16-bit /// /// Note that this tests for 16-bit pointer width, and nothing else. bool isArch16Bit() const; /// Helper function for doing comparisons against version numbers included in /// the target triple. bool isOSVersionLT(unsigned Major, unsigned Minor = 0, unsigned Micro = 0) const { if (Minor == 0) { return getOSVersion() < VersionTuple(Major); } if (Micro == 0) { return getOSVersion() < VersionTuple(Major, Minor); } return getOSVersion() < VersionTuple(Major, Minor, Micro); } bool isOSVersionLT(const Triple &Other) const { return getOSVersion() < Other.getOSVersion(); } /// Comparison function for checking OS X version compatibility, which handles /// supporting skewed version numbering schemes used by the "darwin" triples. bool isMacOSXVersionLT(unsigned Major, unsigned Minor = 0, unsigned Micro = 0) const; /// Is this a Mac OS X triple. For legacy reasons, we support both "darwin" /// and "osx" as OS X triples. bool isMacOSX() const { return getOS() == Triple::Darwin || getOS() == Triple::MacOSX; } /// Is this an iOS triple. /// Note: This identifies tvOS as a variant of iOS. If that ever /// changes, i.e., if the two operating systems diverge or their version /// numbers get out of sync, that will need to be changed. /// watchOS has completely different version numbers so it is not included. bool isiOS() const { return getOS() == Triple::IOS || isTvOS(); } /// Is this an Apple tvOS triple. bool isTvOS() const { return getOS() == Triple::TvOS; } /// Is this an Apple watchOS triple. bool isWatchOS() const { return getOS() == Triple::WatchOS; } bool isWatchABI() const { return getSubArch() == Triple::ARMSubArch_v7k; } /// Is this an Apple XROS triple. bool isXROS() const { return getOS() == Triple::XROS; } /// Is this an Apple DriverKit triple. bool isDriverKit() const { return getOS() == Triple::DriverKit; } bool isOSzOS() const { return getOS() == Triple::ZOS; } /// Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit). bool isOSDarwin() const { return isMacOSX() || isiOS() || isWatchOS() || isDriverKit() || isXROS(); } bool isSimulatorEnvironment() const { return getEnvironment() == Triple::Simulator; } bool isMacCatalystEnvironment() const { return getEnvironment() == Triple::MacABI; } /// Returns true for targets that run on a macOS machine. bool isTargetMachineMac() const { return isMacOSX() || (isOSDarwin() && (isSimulatorEnvironment() || isMacCatalystEnvironment())); } bool isOSNetBSD() const { return getOS() == Triple::NetBSD; } bool isOSOpenBSD() const { return getOS() == Triple::OpenBSD; } bool isOSFreeBSD() const { return getOS() == Triple::FreeBSD; } bool isOSFuchsia() const { return getOS() == Triple::Fuchsia; } bool isOSDragonFly() const { return getOS() == Triple::DragonFly; } bool isOSSolaris() const { return getOS() == Triple::Solaris; } bool isOSIAMCU() const { return getOS() == Triple::ELFIAMCU; } bool isOSUnknown() const { return getOS() == Triple::UnknownOS; } bool isGNUEnvironment() const { EnvironmentType Env = getEnvironment(); - return Env == Triple::GNU || Env == Triple::GNUABIN32 || - Env == Triple::GNUABI64 || Env == Triple::GNUEABI || - Env == Triple::GNUEABIHF || Env == Triple::GNUF32 || - Env == Triple::GNUF64 || Env == Triple::GNUSF || - Env == Triple::GNUX32; + return Env == Triple::GNU || Env == Triple::GNUT64 || + Env == Triple::GNUABIN32 || Env == Triple::GNUABI64 || + Env == Triple::GNUEABI || Env == Triple::GNUEABIT64 || + Env == Triple::GNUEABIHF || Env == Triple::GNUEABIHFT64 || + Env == Triple::GNUF32 || Env == Triple::GNUF64 || + Env == Triple::GNUSF || Env == Triple::GNUX32; } /// Tests whether the OS is Haiku. bool isOSHaiku() const { return getOS() == Triple::Haiku; } /// Tests whether the OS is UEFI. bool isUEFI() const { return getOS() == Triple::UEFI; } /// Tests whether the OS is Windows. bool isOSWindows() const { return getOS() == Triple::Win32; } /// Checks if the environment is MSVC. bool isKnownWindowsMSVCEnvironment() const { return isOSWindows() && getEnvironment() == Triple::MSVC; } /// Checks if the environment could be MSVC. bool isWindowsMSVCEnvironment() const { return isKnownWindowsMSVCEnvironment() || (isOSWindows() && getEnvironment() == Triple::UnknownEnvironment); } // Checks if we're using the Windows Arm64EC ABI. bool isWindowsArm64EC() const { return getArch() == Triple::aarch64 && getSubArch() == Triple::AArch64SubArch_arm64ec; } bool isWindowsCoreCLREnvironment() const { return isOSWindows() && getEnvironment() == Triple::CoreCLR; } bool isWindowsItaniumEnvironment() const { return isOSWindows() && getEnvironment() == Triple::Itanium; } bool isWindowsCygwinEnvironment() const { return isOSWindows() && getEnvironment() == Triple::Cygnus; } bool isWindowsGNUEnvironment() const { return isOSWindows() && getEnvironment() == Triple::GNU; } /// Tests for either Cygwin or MinGW OS bool isOSCygMing() const { return isWindowsCygwinEnvironment() || isWindowsGNUEnvironment(); } /// Is this a "Windows" OS targeting a "MSVCRT.dll" environment. bool isOSMSVCRT() const { return isWindowsMSVCEnvironment() || isWindowsGNUEnvironment() || isWindowsItaniumEnvironment(); } /// Tests whether the OS is NaCl (Native Client) bool isOSNaCl() const { return getOS() == Triple::NaCl; } /// Tests whether the OS is Linux. bool isOSLinux() const { return getOS() == Triple::Linux; } /// Tests whether the OS is kFreeBSD. bool isOSKFreeBSD() const { return getOS() == Triple::KFreeBSD; } /// Tests whether the OS is Hurd. bool isOSHurd() const { return getOS() == Triple::Hurd; } /// Tests whether the OS is WASI. bool isOSWASI() const { return getOS() == Triple::WASI; } /// Tests whether the OS is Emscripten. bool isOSEmscripten() const { return getOS() == Triple::Emscripten; } /// Tests whether the OS uses glibc. bool isOSGlibc() const { return (getOS() == Triple::Linux || getOS() == Triple::KFreeBSD || getOS() == Triple::Hurd) && !isAndroid(); } /// Tests whether the OS is AIX. bool isOSAIX() const { return getOS() == Triple::AIX; } bool isOSSerenity() const { return getOS() == Triple::Serenity; } /// Tests whether the OS uses the ELF binary format. bool isOSBinFormatELF() const { return getObjectFormat() == Triple::ELF; } /// Tests whether the OS uses the COFF binary format. bool isOSBinFormatCOFF() const { return getObjectFormat() == Triple::COFF; } /// Tests whether the OS uses the GOFF binary format. bool isOSBinFormatGOFF() const { return getObjectFormat() == Triple::GOFF; } /// Tests whether the environment is MachO. bool isOSBinFormatMachO() const { return getObjectFormat() == Triple::MachO; } /// Tests whether the OS uses the Wasm binary format. bool isOSBinFormatWasm() const { return getObjectFormat() == Triple::Wasm; } /// Tests whether the OS uses the XCOFF binary format. bool isOSBinFormatXCOFF() const { return getObjectFormat() == Triple::XCOFF; } /// Tests whether the OS uses the DXContainer binary format. bool isOSBinFormatDXContainer() const { return getObjectFormat() == Triple::DXContainer; } /// Tests whether the target is the PS4 platform. bool isPS4() const { return getArch() == Triple::x86_64 && getVendor() == Triple::SCEI && getOS() == Triple::PS4; } /// Tests whether the target is the PS5 platform. bool isPS5() const { return getArch() == Triple::x86_64 && getVendor() == Triple::SCEI && getOS() == Triple::PS5; } /// Tests whether the target is the PS4 or PS5 platform. bool isPS() const { return isPS4() || isPS5(); } /// Tests whether the target is Android bool isAndroid() const { return getEnvironment() == Triple::Android; } bool isAndroidVersionLT(unsigned Major) const { assert(isAndroid() && "Not an Android triple!"); VersionTuple Version = getEnvironmentVersion(); // 64-bit targets did not exist before API level 21 (Lollipop). if (isArch64Bit() && Version.getMajor() < 21) return VersionTuple(21) < VersionTuple(Major); return Version < VersionTuple(Major); } /// Tests whether the environment is musl-libc bool isMusl() const { return getEnvironment() == Triple::Musl || getEnvironment() == Triple::MuslEABI || getEnvironment() == Triple::MuslEABIHF || getEnvironment() == Triple::MuslX32 || getEnvironment() == Triple::OpenHOS || isOSLiteOS(); } /// Tests whether the target is OHOS /// LiteOS default enviroment is also OHOS, but omited on triple. bool isOHOSFamily() const { return isOpenHOS() || isOSLiteOS(); } bool isOpenHOS() const { return getEnvironment() == Triple::OpenHOS; } bool isOSLiteOS() const { return getOS() == Triple::LiteOS; } /// Tests whether the target is DXIL. bool isDXIL() const { return getArch() == Triple::dxil; } bool isShaderModelOS() const { return getOS() == Triple::ShaderModel; } bool isVulkanOS() const { return getOS() == Triple::Vulkan; } bool isShaderStageEnvironment() const { EnvironmentType Env = getEnvironment(); return Env == Triple::Pixel || Env == Triple::Vertex || Env == Triple::Geometry || Env == Triple::Hull || Env == Triple::Domain || Env == Triple::Compute || Env == Triple::Library || Env == Triple::RayGeneration || Env == Triple::Intersection || Env == Triple::AnyHit || Env == Triple::ClosestHit || Env == Triple::Miss || Env == Triple::Callable || Env == Triple::Mesh || Env == Triple::Amplification; } /// Tests whether the target is SPIR (32- or 64-bit). bool isSPIR() const { return getArch() == Triple::spir || getArch() == Triple::spir64; } /// Tests whether the target is SPIR-V (32/64-bit/Logical). bool isSPIRV() const { return getArch() == Triple::spirv32 || getArch() == Triple::spirv64 || getArch() == Triple::spirv; } /// Tests whether the target is SPIR-V Logical bool isSPIRVLogical() const { return getArch() == Triple::spirv; } /// Tests whether the target is NVPTX (32- or 64-bit). bool isNVPTX() const { return getArch() == Triple::nvptx || getArch() == Triple::nvptx64; } /// Tests whether the target is AMDGCN bool isAMDGCN() const { return getArch() == Triple::amdgcn; } bool isAMDGPU() const { return getArch() == Triple::r600 || getArch() == Triple::amdgcn; } /// Tests whether the target is Thumb (little and big endian). bool isThumb() const { return getArch() == Triple::thumb || getArch() == Triple::thumbeb; } /// Tests whether the target is ARM (little and big endian). bool isARM() const { return getArch() == Triple::arm || getArch() == Triple::armeb; } /// Tests whether the target supports the EHABI exception /// handling standard. bool isTargetEHABICompatible() const { return (isARM() || isThumb()) && (getEnvironment() == Triple::EABI || getEnvironment() == Triple::GNUEABI || + getEnvironment() == Triple::GNUEABIT64 || getEnvironment() == Triple::MuslEABI || getEnvironment() == Triple::EABIHF || getEnvironment() == Triple::GNUEABIHF || + getEnvironment() == Triple::GNUEABIHFT64 || getEnvironment() == Triple::OpenHOS || getEnvironment() == Triple::MuslEABIHF || isAndroid()) && isOSBinFormatELF(); } /// Tests whether the target is T32. bool isArmT32() const { switch (getSubArch()) { case Triple::ARMSubArch_v8m_baseline: case Triple::ARMSubArch_v7s: case Triple::ARMSubArch_v7k: case Triple::ARMSubArch_v7ve: case Triple::ARMSubArch_v6: case Triple::ARMSubArch_v6m: case Triple::ARMSubArch_v6k: case Triple::ARMSubArch_v6t2: case Triple::ARMSubArch_v5: case Triple::ARMSubArch_v5te: case Triple::ARMSubArch_v4t: return false; default: return true; } } /// Tests whether the target is an M-class. bool isArmMClass() const { switch (getSubArch()) { case Triple::ARMSubArch_v6m: case Triple::ARMSubArch_v7m: case Triple::ARMSubArch_v7em: case Triple::ARMSubArch_v8m_mainline: case Triple::ARMSubArch_v8m_baseline: case Triple::ARMSubArch_v8_1m_mainline: return true; default: return false; } } /// Tests whether the target is AArch64 (little and big endian). bool isAArch64() const { return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be || getArch() == Triple::aarch64_32; } /// Tests whether the target is AArch64 and pointers are the size specified by /// \p PointerWidth. bool isAArch64(int PointerWidth) const { assert(PointerWidth == 64 || PointerWidth == 32); if (!isAArch64()) return false; return getArch() == Triple::aarch64_32 || getEnvironment() == Triple::GNUILP32 ? PointerWidth == 32 : PointerWidth == 64; } /// Tests whether the target is 32-bit LoongArch. bool isLoongArch32() const { return getArch() == Triple::loongarch32; } /// Tests whether the target is 64-bit LoongArch. bool isLoongArch64() const { return getArch() == Triple::loongarch64; } /// Tests whether the target is LoongArch (32- and 64-bit). bool isLoongArch() const { return isLoongArch32() || isLoongArch64(); } /// Tests whether the target is MIPS 32-bit (little and big endian). bool isMIPS32() const { return getArch() == Triple::mips || getArch() == Triple::mipsel; } /// Tests whether the target is MIPS 64-bit (little and big endian). bool isMIPS64() const { return getArch() == Triple::mips64 || getArch() == Triple::mips64el; } /// Tests whether the target is MIPS (little and big endian, 32- or 64-bit). bool isMIPS() const { return isMIPS32() || isMIPS64(); } /// Tests whether the target is PowerPC (32- or 64-bit LE or BE). bool isPPC() const { return getArch() == Triple::ppc || getArch() == Triple::ppc64 || getArch() == Triple::ppcle || getArch() == Triple::ppc64le; } /// Tests whether the target is 32-bit PowerPC (little and big endian). bool isPPC32() const { return getArch() == Triple::ppc || getArch() == Triple::ppcle; } /// Tests whether the target is 64-bit PowerPC (little and big endian). bool isPPC64() const { return getArch() == Triple::ppc64 || getArch() == Triple::ppc64le; } /// Tests whether the target 64-bit PowerPC big endian ABI is ELFv2. bool isPPC64ELFv2ABI() const { return (getArch() == Triple::ppc64 && ((getOS() == Triple::FreeBSD && (getOSMajorVersion() >= 13 || getOSVersion().empty())) || getOS() == Triple::OpenBSD || isMusl())); } /// Tests whether the target 32-bit PowerPC uses Secure PLT. bool isPPC32SecurePlt() const { return ((getArch() == Triple::ppc || getArch() == Triple::ppcle) && ((getOS() == Triple::FreeBSD && (getOSMajorVersion() >= 13 || getOSVersion().empty())) || getOS() == Triple::NetBSD || getOS() == Triple::OpenBSD || isMusl())); } /// Tests whether the target is 32-bit RISC-V. bool isRISCV32() const { return getArch() == Triple::riscv32; } /// Tests whether the target is 64-bit RISC-V. bool isRISCV64() const { return getArch() == Triple::riscv64; } /// Tests whether the target is RISC-V (32- and 64-bit). bool isRISCV() const { return isRISCV32() || isRISCV64(); } /// Tests whether the target is 32-bit SPARC (little and big endian). bool isSPARC32() const { return getArch() == Triple::sparc || getArch() == Triple::sparcel; } /// Tests whether the target is 64-bit SPARC (big endian). bool isSPARC64() const { return getArch() == Triple::sparcv9; } /// Tests whether the target is SPARC. bool isSPARC() const { return isSPARC32() || isSPARC64(); } /// Tests whether the target is SystemZ. bool isSystemZ() const { return getArch() == Triple::systemz; } /// Tests whether the target is x86 (32- or 64-bit). bool isX86() const { return getArch() == Triple::x86 || getArch() == Triple::x86_64; } /// Tests whether the target is VE bool isVE() const { return getArch() == Triple::ve; } /// Tests whether the target is wasm (32- and 64-bit). bool isWasm() const { return getArch() == Triple::wasm32 || getArch() == Triple::wasm64; } // Tests whether the target is CSKY bool isCSKY() const { return getArch() == Triple::csky; } /// Tests whether the target is the Apple "arm64e" AArch64 subarch. bool isArm64e() const { return getArch() == Triple::aarch64 && getSubArch() == Triple::AArch64SubArch_arm64e; } /// Tests whether the target is X32. bool isX32() const { EnvironmentType Env = getEnvironment(); return Env == Triple::GNUX32 || Env == Triple::MuslX32; } /// Tests whether the target is eBPF. bool isBPF() const { return getArch() == Triple::bpfel || getArch() == Triple::bpfeb; } + /// Tests if the target forces 64-bit time_t on a 32-bit architecture. + bool isTime64ABI() const { + EnvironmentType Env = getEnvironment(); + return Env == Triple::GNUT64 || Env == Triple::GNUEABIT64 || + Env == Triple::GNUEABIHFT64; + } + + /// Tests if the target forces hardfloat. + bool isHardFloatABI() const { + EnvironmentType Env = getEnvironment(); + return Env == llvm::Triple::GNUEABIHF || + Env == llvm::Triple::GNUEABIHFT64 || + Env == llvm::Triple::MuslEABIHF || + Env == llvm::Triple::EABIHF; + } + /// Tests whether the target supports comdat bool supportsCOMDAT() const { return !(isOSBinFormatMachO() || isOSBinFormatXCOFF() || isOSBinFormatDXContainer()); } /// Tests whether the target uses emulated TLS as default. /// /// Note: Android API level 29 (10) introduced ELF TLS. bool hasDefaultEmulatedTLS() const { return (isAndroid() && isAndroidVersionLT(29)) || isOSOpenBSD() || isWindowsCygwinEnvironment() || isOHOSFamily(); } /// True if the target supports both general-dynamic and TLSDESC, and TLSDESC /// is enabled by default. bool hasDefaultTLSDESC() const { return isAndroid() && isRISCV64(); } /// Tests whether the target uses -data-sections as default. bool hasDefaultDataSections() const { return isOSBinFormatXCOFF() || isWasm(); } /// Tests if the environment supports dllimport/export annotations. bool hasDLLImportExport() const { return isOSWindows() || isPS(); } /// @} /// @name Mutators /// @{ /// Set the architecture (first) component of the triple to a known type. void setArch(ArchType Kind, SubArchType SubArch = NoSubArch); /// Set the vendor (second) component of the triple to a known type. void setVendor(VendorType Kind); /// Set the operating system (third) component of the triple to a known type. void setOS(OSType Kind); /// Set the environment (fourth) component of the triple to a known type. void setEnvironment(EnvironmentType Kind); /// Set the object file format. void setObjectFormat(ObjectFormatType Kind); /// Set all components to the new triple \p Str. void setTriple(const Twine &Str); /// Set the architecture (first) component of the triple by name. void setArchName(StringRef Str); /// Set the vendor (second) component of the triple by name. void setVendorName(StringRef Str); /// Set the operating system (third) component of the triple by name. void setOSName(StringRef Str); /// Set the optional environment (fourth) component of the triple by name. void setEnvironmentName(StringRef Str); /// Set the operating system and optional environment components with a single /// string. void setOSAndEnvironmentName(StringRef Str); /// @} /// @name Helpers to build variants of a particular triple. /// @{ /// Form a triple with a 32-bit variant of the current architecture. /// /// This can be used to move across "families" of architectures where useful. /// /// \returns A new triple with a 32-bit architecture or an unknown /// architecture if no such variant can be found. llvm::Triple get32BitArchVariant() const; /// Form a triple with a 64-bit variant of the current architecture. /// /// This can be used to move across "families" of architectures where useful. /// /// \returns A new triple with a 64-bit architecture or an unknown /// architecture if no such variant can be found. llvm::Triple get64BitArchVariant() const; /// Form a triple with a big endian variant of the current architecture. /// /// This can be used to move across "families" of architectures where useful. /// /// \returns A new triple with a big endian architecture or an unknown /// architecture if no such variant can be found. llvm::Triple getBigEndianArchVariant() const; /// Form a triple with a little endian variant of the current architecture. /// /// This can be used to move across "families" of architectures where useful. /// /// \returns A new triple with a little endian architecture or an unknown /// architecture if no such variant can be found. llvm::Triple getLittleEndianArchVariant() const; /// Tests whether the target triple is little endian. /// /// \returns true if the triple is little endian, false otherwise. bool isLittleEndian() const; /// Test whether target triples are compatible. bool isCompatibleWith(const Triple &Other) const; /// Merge target triples. std::string merge(const Triple &Other) const; /// Some platforms have different minimum supported OS versions that /// varies by the architecture specified in the triple. This function /// returns the minimum supported OS version for this triple if one an exists, /// or an invalid version tuple if this triple doesn't have one. VersionTuple getMinimumSupportedOSVersion() const; /// @} /// @name Static helpers for IDs. /// @{ /// Get the canonical name for the \p Kind architecture. static StringRef getArchTypeName(ArchType Kind); /// Get the architecture name based on \p Kind and \p SubArch. static StringRef getArchName(ArchType Kind, SubArchType SubArch = NoSubArch); /// Get the "prefix" canonical name for the \p Kind architecture. This is the /// prefix used by the architecture specific builtins, and is suitable for /// passing to \see Intrinsic::getIntrinsicForClangBuiltin(). /// /// \return - The architecture prefix, or 0 if none is defined. static StringRef getArchTypePrefix(ArchType Kind); /// Get the canonical name for the \p Kind vendor. static StringRef getVendorTypeName(VendorType Kind); /// Get the canonical name for the \p Kind operating system. static StringRef getOSTypeName(OSType Kind); /// Get the canonical name for the \p Kind environment. static StringRef getEnvironmentTypeName(EnvironmentType Kind); /// Get the name for the \p Object format. static StringRef getObjectFormatTypeName(ObjectFormatType ObjectFormat); /// @} /// @name Static helpers for converting alternate architecture names. /// @{ /// The canonical type for the given LLVM architecture name (e.g., "x86"). static ArchType getArchTypeForLLVMName(StringRef Str); /// @} /// Returns a canonicalized OS version number for the specified OS. static VersionTuple getCanonicalVersionForOS(OSType OSKind, const VersionTuple &Version); }; } // End llvm namespace #endif diff --git a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp index df75745645e0..ff30fece5fce 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/ConstantFolding.cpp @@ -1,3663 +1,3663 @@ //===-- ConstantFolding.cpp - Fold instructions into constants ------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines routines for folding instructions into constants. // // Also, to supplement the basic IR ConstantExpr simplifications, // this file defines some additional folding routines that can make use of // DataLayout information. These functions cannot go in IR due to library // dependency issues. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/ConstantFolding.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/Config/config.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantFold.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include #include #include #include #include using namespace llvm; namespace { //===----------------------------------------------------------------------===// // Constant Folding internal helper functions //===----------------------------------------------------------------------===// static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy, Constant *C, Type *SrcEltTy, unsigned NumSrcElts, const DataLayout &DL) { // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy); for (unsigned i = 0; i != NumSrcElts; ++i) { Constant *Element; if (DL.isLittleEndian()) Element = C->getAggregateElement(NumSrcElts - i - 1); else Element = C->getAggregateElement(i); if (Element && isa(Element)) { Result <<= BitShift; continue; } auto *ElementCI = dyn_cast_or_null(Element); if (!ElementCI) return ConstantExpr::getBitCast(C, DestTy); Result <<= BitShift; Result |= ElementCI->getValue().zext(Result.getBitWidth()); } return nullptr; } /// Constant fold bitcast, symbolically evaluating it with DataLayout. /// This always returns a non-null constant, but it may be a /// ConstantExpr if unfoldable. Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { assert(CastInst::castIsValid(Instruction::BitCast, C, DestTy) && "Invalid constantexpr bitcast!"); // Catch the obvious splat cases. if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL)) return Res; if (auto *VTy = dyn_cast(C->getType())) { // Handle a vector->scalar integer/fp cast. if (isa(DestTy) || DestTy->isFloatingPointTy()) { unsigned NumSrcElts = cast(VTy)->getNumElements(); Type *SrcEltTy = VTy->getElementType(); // If the vector is a vector of floating point, convert it to vector of int // to simplify things. if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); auto *SrcIVTy = FixedVectorType::get( IntegerType::get(C->getContext(), FPWidth), NumSrcElts); // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); } APInt Result(DL.getTypeSizeInBits(DestTy), 0); if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C, SrcEltTy, NumSrcElts, DL)) return CE; if (isa(DestTy)) return ConstantInt::get(DestTy, Result); APFloat FP(DestTy->getFltSemantics(), Result); return ConstantFP::get(DestTy->getContext(), FP); } } // The code below only handles casts to vectors currently. auto *DestVTy = dyn_cast(DestTy); if (!DestVTy) return ConstantExpr::getBitCast(C, DestTy); // If this is a scalar -> vector cast, convert the input into a <1 x scalar> // vector so the code below can handle it uniformly. if (isa(C) || isa(C)) { Constant *Ops = C; // don't take the address of C! return FoldBitCast(ConstantVector::get(Ops), DestTy, DL); } // If this is a bitcast from constant vector -> vector, fold it. if (!isa(C) && !isa(C)) return ConstantExpr::getBitCast(C, DestTy); // If the element types match, IR can fold it. unsigned NumDstElt = cast(DestVTy)->getNumElements(); unsigned NumSrcElt = cast(C->getType())->getNumElements(); if (NumDstElt == NumSrcElt) return ConstantExpr::getBitCast(C, DestTy); Type *SrcEltTy = cast(C->getType())->getElementType(); Type *DstEltTy = DestVTy->getElementType(); // Otherwise, we're changing the number of elements in a vector, which // requires endianness information to do the right thing. For example, // bitcast (<2 x i64> to <4 x i32>) // folds to (little endian): // <4 x i32> // and to (big endian): // <4 x i32> // First thing is first. We only want to think about integer here, so if // we have something in FP form, recast it as integer. if (DstEltTy->isFloatingPointTy()) { // Fold to an vector of integers with same size as our FP type. unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits(); auto *DestIVTy = FixedVectorType::get( IntegerType::get(C->getContext(), FPWidth), NumDstElt); // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, DL); // Finally, IR can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); } // Okay, we know the destination is integer, if the input is FP, convert // it to integer first. if (SrcEltTy->isFloatingPointTy()) { unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); auto *SrcIVTy = FixedVectorType::get( IntegerType::get(C->getContext(), FPWidth), NumSrcElt); // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); // If IR wasn't able to fold it, bail out. if (!isa(C) && // FIXME: Remove ConstantVector. !isa(C)) return C; } // Now we know that the input and output vectors are both integer vectors // of the same size, and that their #elements is not the same. Do the // conversion here, which depends on whether the input or output has // more elements. bool isLittleEndian = DL.isLittleEndian(); SmallVector Result; if (NumDstElt < NumSrcElt) { // Handle: bitcast (<4 x i32> to <2 x i64>) Constant *Zero = Constant::getNullValue(DstEltTy); unsigned Ratio = NumSrcElt/NumDstElt; unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits(); unsigned SrcElt = 0; for (unsigned i = 0; i != NumDstElt; ++i) { // Build each element of the result. Constant *Elt = Zero; unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { Constant *Src = C->getAggregateElement(SrcElt++); if (Src && isa(Src)) Src = Constant::getNullValue( cast(C->getType())->getElementType()); else Src = dyn_cast_or_null(Src); if (!Src) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); // Zero extend the element to the right size. Src = ConstantFoldCastOperand(Instruction::ZExt, Src, Elt->getType(), DL); assert(Src && "Constant folding cannot fail on plain integers"); // Shift it to the right place, depending on endianness. Src = ConstantFoldBinaryOpOperands( Instruction::Shl, Src, ConstantInt::get(Src->getType(), ShiftAmt), DL); assert(Src && "Constant folding cannot fail on plain integers"); ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; // Mix it in. Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL); assert(Elt && "Constant folding cannot fail on plain integers"); } Result.push_back(Elt); } return ConstantVector::get(Result); } // Handle: bitcast (<2 x i64> to <4 x i32>) unsigned Ratio = NumDstElt/NumSrcElt; unsigned DstBitSize = DL.getTypeSizeInBits(DstEltTy); // Loop over each source value, expanding into multiple results. for (unsigned i = 0; i != NumSrcElt; ++i) { auto *Element = C->getAggregateElement(i); if (!Element) // Reject constantexpr elements. return ConstantExpr::getBitCast(C, DestTy); if (isa(Element)) { // Correctly Propagate undef values. Result.append(Ratio, UndefValue::get(DstEltTy)); continue; } auto *Src = dyn_cast(Element); if (!Src) return ConstantExpr::getBitCast(C, DestTy); unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1); for (unsigned j = 0; j != Ratio; ++j) { // Shift the piece of the value into the right place, depending on // endianness. APInt Elt = Src->getValue().lshr(ShiftAmt); ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; // Truncate and remember this piece. Result.push_back(ConstantInt::get(DstEltTy, Elt.trunc(DstBitSize))); } } return ConstantVector::get(Result); } } // end anonymous namespace /// If this constant is a constant offset from a global, return the global and /// the constant. Because of constantexprs, this function is recursive. bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &DL, DSOLocalEquivalent **DSOEquiv) { if (DSOEquiv) *DSOEquiv = nullptr; // Trivial case, constant is the global. if ((GV = dyn_cast(C))) { unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); Offset = APInt(BitWidth, 0); return true; } if (auto *FoundDSOEquiv = dyn_cast(C)) { if (DSOEquiv) *DSOEquiv = FoundDSOEquiv; GV = FoundDSOEquiv->getGlobalValue(); unsigned BitWidth = DL.getIndexTypeSizeInBits(GV->getType()); Offset = APInt(BitWidth, 0); return true; } // Otherwise, if this isn't a constant expr, bail out. auto *CE = dyn_cast(C); if (!CE) return false; // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || CE->getOpcode() == Instruction::BitCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL, DSOEquiv); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) auto *GEP = dyn_cast(CE); if (!GEP) return false; unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); APInt TmpOffset(BitWidth, 0); // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, DL, DSOEquiv)) return false; // Otherwise, add any offset that our operands provide. if (!GEP->accumulateConstantOffset(DL, TmpOffset)) return false; Offset = TmpOffset; return true; } Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy, const DataLayout &DL) { do { Type *SrcTy = C->getType(); if (SrcTy == DestTy) return C; TypeSize DestSize = DL.getTypeSizeInBits(DestTy); TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy); if (!TypeSize::isKnownGE(SrcSize, DestSize)) return nullptr; // Catch the obvious splat cases (since all-zeros can coerce non-integral // pointers legally). if (Constant *Res = ConstantFoldLoadFromUniformValue(C, DestTy, DL)) return Res; // If the type sizes are the same and a cast is legal, just directly // cast the constant. // But be careful not to coerce non-integral pointers illegally. if (SrcSize == DestSize && DL.isNonIntegralPointerType(SrcTy->getScalarType()) == DL.isNonIntegralPointerType(DestTy->getScalarType())) { Instruction::CastOps Cast = Instruction::BitCast; // If we are going from a pointer to int or vice versa, we spell the cast // differently. if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) Cast = Instruction::IntToPtr; else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) Cast = Instruction::PtrToInt; if (CastInst::castIsValid(Cast, C, DestTy)) return ConstantFoldCastOperand(Cast, C, DestTy, DL); } // If this isn't an aggregate type, there is nothing we can do to drill down // and find a bitcastable constant. if (!SrcTy->isAggregateType() && !SrcTy->isVectorTy()) return nullptr; // We're simulating a load through a pointer that was bitcast to point to // a different type, so we can try to walk down through the initial // elements of an aggregate to see if some part of the aggregate is // castable to implement the "load" semantic model. if (SrcTy->isStructTy()) { // Struct types might have leading zero-length elements like [0 x i32], // which are certainly not what we are looking for, so skip them. unsigned Elem = 0; Constant *ElemC; do { ElemC = C->getAggregateElement(Elem++); } while (ElemC && DL.getTypeSizeInBits(ElemC->getType()).isZero()); C = ElemC; } else { // For non-byte-sized vector elements, the first element is not // necessarily located at the vector base address. if (auto *VT = dyn_cast(SrcTy)) if (!DL.typeSizeEqualsStoreSize(VT->getElementType())) return nullptr; C = C->getAggregateElement(0u); } } while (C); return nullptr; } namespace { /// Recursive helper to read bits out of global. C is the constant being copied /// out of. ByteOffset is an offset into C. CurPtr is the pointer to copy /// results into and BytesLeft is the number of bytes left in /// the CurPtr buffer. DL is the DataLayout. bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr, unsigned BytesLeft, const DataLayout &DL) { assert(ByteOffset <= DL.getTypeAllocSize(C->getType()) && "Out of range access"); // If this element is zero or undefined, we can just return since *CurPtr is // zero initialized. if (isa(C) || isa(C)) return true; if (auto *CI = dyn_cast(C)) { if ((CI->getBitWidth() & 7) != 0) return false; const APInt &Val = CI->getValue(); unsigned IntBytes = unsigned(CI->getBitWidth()/8); for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) { unsigned n = ByteOffset; if (!DL.isLittleEndian()) n = IntBytes - n - 1; CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue(); ++ByteOffset; } return true; } if (auto *CFP = dyn_cast(C)) { if (CFP->getType()->isDoubleTy()) { C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), DL); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } if (CFP->getType()->isFloatTy()){ C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), DL); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } if (CFP->getType()->isHalfTy()){ C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), DL); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, DL); } return false; } if (auto *CS = dyn_cast(C)) { const StructLayout *SL = DL.getStructLayout(CS->getType()); unsigned Index = SL->getElementContainingOffset(ByteOffset); uint64_t CurEltOffset = SL->getElementOffset(Index); ByteOffset -= CurEltOffset; while (true) { // If the element access is to the element itself and not to tail padding, // read the bytes from the element. uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType()); if (ByteOffset < EltSize && !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr, BytesLeft, DL)) return false; ++Index; // Check to see if we read from the last struct element, if so we're done. if (Index == CS->getType()->getNumElements()) return true; // If we read all of the bytes we needed from this element we're done. uint64_t NextEltOffset = SL->getElementOffset(Index); if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset) return true; // Move to the next element of the struct. CurPtr += NextEltOffset - CurEltOffset - ByteOffset; BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset; ByteOffset = 0; CurEltOffset = NextEltOffset; } // not reached. } if (isa(C) || isa(C) || isa(C)) { uint64_t NumElts, EltSize; Type *EltTy; if (auto *AT = dyn_cast(C->getType())) { NumElts = AT->getNumElements(); EltTy = AT->getElementType(); EltSize = DL.getTypeAllocSize(EltTy); } else { NumElts = cast(C->getType())->getNumElements(); EltTy = cast(C->getType())->getElementType(); // TODO: For non-byte-sized vectors, current implementation assumes there is // padding to the next byte boundary between elements. if (!DL.typeSizeEqualsStoreSize(EltTy)) return false; EltSize = DL.getTypeStoreSize(EltTy); } uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, BytesLeft, DL)) return false; uint64_t BytesWritten = EltSize - Offset; assert(BytesWritten <= EltSize && "Not indexing into this element?"); if (BytesWritten >= BytesLeft) return true; Offset = 0; BytesLeft -= BytesWritten; CurPtr += BytesWritten; } return true; } if (auto *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::IntToPtr && CE->getOperand(0)->getType() == DL.getIntPtrType(CE->getType())) { return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, BytesLeft, DL); } } // Otherwise, unknown initializer type. return false; } Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy, int64_t Offset, const DataLayout &DL) { // Bail out early. Not expect to load from scalable global variable. if (isa(LoadTy)) return nullptr; auto *IntType = dyn_cast(LoadTy); // If this isn't an integer load we can't fold it directly. if (!IntType) { // If this is a non-integer load, we can try folding it as an int load and // then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() && !LoadTy->isVectorTy()) return nullptr; Type *MapTy = Type::getIntNTy(C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedValue()); if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) { if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && !LoadTy->isX86_AMXTy()) // Materializing a zero can be done trivially without a bitcast return Constant::getNullValue(LoadTy); Type *CastTy = LoadTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(LoadTy) : LoadTy; Res = FoldBitCast(Res, CastTy, DL); if (LoadTy->isPtrOrPtrVectorTy()) { // For vector of pointer, we needed to first convert to a vector of integer, then do vector inttoptr if (Res->isNullValue() && !LoadTy->isX86_MMXTy() && !LoadTy->isX86_AMXTy()) return Constant::getNullValue(LoadTy); if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) // Be careful not to replace a load of an addrspace value with an inttoptr here return nullptr; Res = ConstantExpr::getIntToPtr(Res, LoadTy); } return Res; } return nullptr; } unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; if (BytesLoaded > 32 || BytesLoaded == 0) return nullptr; // If we're not accessing anything in this constant, the result is undefined. if (Offset <= -1 * static_cast(BytesLoaded)) return PoisonValue::get(IntType); // TODO: We should be able to support scalable types. TypeSize InitializerSize = DL.getTypeAllocSize(C->getType()); if (InitializerSize.isScalable()) return nullptr; // If we're not accessing anything in this constant, the result is undefined. if (Offset >= (int64_t)InitializerSize.getFixedValue()) return PoisonValue::get(IntType); unsigned char RawBytes[32] = {0}; unsigned char *CurPtr = RawBytes; unsigned BytesLeft = BytesLoaded; // If we're loading off the beginning of the global, some bytes may be valid. if (Offset < 0) { CurPtr += -Offset; BytesLeft += Offset; Offset = 0; } if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL)) return nullptr; APInt ResultVal = APInt(IntType->getBitWidth(), 0); if (DL.isLittleEndian()) { ResultVal = RawBytes[BytesLoaded - 1]; for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; ResultVal |= RawBytes[BytesLoaded - 1 - i]; } } else { ResultVal = RawBytes[0]; for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; ResultVal |= RawBytes[i]; } } return ConstantInt::get(IntType->getContext(), ResultVal); } } // anonymous namespace // If GV is a constant with an initializer read its representation starting // at Offset and return it as a constant array of unsigned char. Otherwise // return null. Constant *llvm::ReadByteArrayFromGlobal(const GlobalVariable *GV, uint64_t Offset) { if (!GV->isConstant() || !GV->hasDefinitiveInitializer()) return nullptr; const DataLayout &DL = GV->getDataLayout(); Constant *Init = const_cast(GV->getInitializer()); TypeSize InitSize = DL.getTypeAllocSize(Init->getType()); if (InitSize < Offset) return nullptr; uint64_t NBytes = InitSize - Offset; if (NBytes > UINT16_MAX) // Bail for large initializers in excess of 64K to avoid allocating // too much memory. // Offset is assumed to be less than or equal than InitSize (this // is enforced in ReadDataFromGlobal). return nullptr; SmallVector RawBytes(static_cast(NBytes)); unsigned char *CurPtr = RawBytes.data(); if (!ReadDataFromGlobal(Init, Offset, CurPtr, NBytes, DL)) return nullptr; return ConstantDataArray::get(GV->getContext(), RawBytes); } /// If this Offset points exactly to the start of an aggregate element, return /// that element, otherwise return nullptr. Constant *getConstantAtOffset(Constant *Base, APInt Offset, const DataLayout &DL) { if (Offset.isZero()) return Base; if (!isa(Base) && !isa(Base)) return nullptr; Type *ElemTy = Base->getType(); SmallVector Indices = DL.getGEPIndicesForOffset(ElemTy, Offset); if (!Offset.isZero() || !Indices[0].isZero()) return nullptr; Constant *C = Base; for (const APInt &Index : drop_begin(Indices)) { if (Index.isNegative() || Index.getActiveBits() >= 32) return nullptr; C = C->getAggregateElement(Index.getZExtValue()); if (!C) return nullptr; } return C; } Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL) { if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL)) if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL)) return Result; // Explicitly check for out-of-bounds access, so we return poison even if the // constant is a uniform value. TypeSize Size = DL.getTypeAllocSize(C->getType()); if (!Size.isScalable() && Offset.sge(Size.getFixedValue())) return PoisonValue::get(Ty); // Try an offset-independent fold of a uniform value. if (Constant *Result = ConstantFoldLoadFromUniformValue(C, Ty, DL)) return Result; // Try hard to fold loads from bitcasted strange and non-type-safe things. if (Offset.getSignificantBits() <= 64) if (Constant *Result = FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL)) return Result; return nullptr; } Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty, const DataLayout &DL) { return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL); } Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL) { // We can only fold loads from constant globals with a definitive initializer. // Check this upfront, to skip expensive offset calculations. auto *GV = dyn_cast(getUnderlyingObject(C)); if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) return nullptr; C = cast(C->stripAndAccumulateConstantOffsets( DL, Offset, /* AllowNonInbounds */ true)); if (C == GV) if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL)) return Result; // If this load comes from anywhere in a uniform constant global, the value // is always the same, regardless of the loaded offset. return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty, DL); } Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL) { APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0); return ConstantFoldLoadFromConstPtr(C, Ty, std::move(Offset), DL); } Constant *llvm::ConstantFoldLoadFromUniformValue(Constant *C, Type *Ty, const DataLayout &DL) { if (isa(C)) return PoisonValue::get(Ty); if (isa(C)) return UndefValue::get(Ty); // If padding is needed when storing C to memory, then it isn't considered as // uniform. if (!DL.typeSizeEqualsStoreSize(C->getType())) return nullptr; if (C->isNullValue() && !Ty->isX86_MMXTy() && !Ty->isX86_AMXTy()) return Constant::getNullValue(Ty); if (C->isAllOnesValue() && (Ty->isIntOrIntVectorTy() || Ty->isFPOrFPVectorTy())) return Constant::getAllOnesValue(Ty); return nullptr; } namespace { /// One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging /// these together. If target data info is available, it is provided as DL, /// otherwise DL is null. Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, const DataLayout &DL) { // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute // bits. if (Opc == Instruction::And) { KnownBits Known0 = computeKnownBits(Op0, DL); KnownBits Known1 = computeKnownBits(Op1, DL); if ((Known1.One | Known0.Zero).isAllOnes()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; } if ((Known0.One | Known1.Zero).isAllOnes()) { // All the bits of Op1 that the 'and' could be masking are already zero. return Op1; } Known0 &= Known1; if (Known0.isConstant()) return ConstantInt::get(Op0->getType(), Known0.getConstant()); } // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. if (Opc == Instruction::Sub) { GlobalValue *GV1, *GV2; APInt Offs1, Offs2; if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, DL)) if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, DL) && GV1 == GV2) { unsigned OpSize = DL.getTypeSizeInBits(Op0->getType()); // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. // PtrToInt may change the bitwidth so we have convert to the right size // first. return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - Offs2.zextOrTrunc(OpSize)); } } return nullptr; } /// If array indices are not pointer-sized integers, explicitly cast them so /// that they aren't implicitly casted by the getelementptr. Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef Ops, Type *ResultTy, GEPNoWrapFlags NW, std::optional InRange, const DataLayout &DL, const TargetLibraryInfo *TLI) { Type *IntIdxTy = DL.getIndexType(ResultTy); Type *IntIdxScalarTy = IntIdxTy->getScalarType(); bool Any = false; SmallVector NewIdxs; for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || !isa(GetElementPtrInst::getIndexedType( SrcElemTy, Ops.slice(1, i - 1)))) && Ops[i]->getType()->getScalarType() != IntIdxScalarTy) { Any = true; Type *NewType = Ops[i]->getType()->isVectorTy() ? IntIdxTy : IntIdxScalarTy; Constant *NewIdx = ConstantFoldCastOperand( CastInst::getCastOpcode(Ops[i], true, NewType, true), Ops[i], NewType, DL); if (!NewIdx) return nullptr; NewIdxs.push_back(NewIdx); } else NewIdxs.push_back(Ops[i]); } if (!Any) return nullptr; Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs, NW, InRange); return ConstantFoldConstant(C, DL, TLI); } /// If we can symbolically evaluate the GEP constant expression, do so. Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP, ArrayRef Ops, const DataLayout &DL, const TargetLibraryInfo *TLI) { Type *SrcElemTy = GEP->getSourceElementType(); Type *ResTy = GEP->getType(); if (!SrcElemTy->isSized() || isa(SrcElemTy)) return nullptr; if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, GEP->getNoWrapFlags(), GEP->getInRange(), DL, TLI)) return C; Constant *Ptr = Ops[0]; if (!Ptr->getType()->isPointerTy()) return nullptr; Type *IntIdxTy = DL.getIndexType(Ptr->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) if (!isa(Ops[i])) return nullptr; unsigned BitWidth = DL.getTypeSizeInBits(IntIdxTy); APInt Offset = APInt( BitWidth, DL.getIndexedOffsetInType( SrcElemTy, ArrayRef((Value *const *)Ops.data() + 1, Ops.size() - 1))); std::optional InRange = GEP->getInRange(); if (InRange) InRange = InRange->sextOrTrunc(BitWidth); // If this is a GEP of a GEP, fold it all into a single GEP. GEPNoWrapFlags NW = GEP->getNoWrapFlags(); bool Overflow = false; while (auto *GEP = dyn_cast(Ptr)) { NW &= GEP->getNoWrapFlags(); SmallVector NestedOps(llvm::drop_begin(GEP->operands())); // Do not try the incorporate the sub-GEP if some index is not a number. bool AllConstantInt = true; for (Value *NestedOp : NestedOps) if (!isa(NestedOp)) { AllConstantInt = false; break; } if (!AllConstantInt) break; // TODO: Try to intersect two inrange attributes? if (!InRange) { InRange = GEP->getInRange(); if (InRange) // Adjust inrange by offset until now. InRange = InRange->sextOrTrunc(BitWidth).subtract(Offset); } Ptr = cast(GEP->getOperand(0)); SrcElemTy = GEP->getSourceElementType(); Offset = Offset.sadd_ov( APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps)), Overflow); } // Preserving nusw (without inbounds) also requires that the offset // additions did not overflow. if (NW.hasNoUnsignedSignedWrap() && !NW.isInBounds() && Overflow) NW = NW.withoutNoUnsignedSignedWrap(); // If the base value for this address is a literal integer value, fold the // getelementptr to the resulting integer value casted to the pointer type. APInt BasePtr(BitWidth, 0); if (auto *CE = dyn_cast(Ptr)) { if (CE->getOpcode() == Instruction::IntToPtr) { if (auto *Base = dyn_cast(CE->getOperand(0))) BasePtr = Base->getValue().zextOrTrunc(BitWidth); } } auto *PTy = cast(Ptr->getType()); if ((Ptr->isNullValue() || BasePtr != 0) && !DL.isNonIntegralPointerType(PTy)) { Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr); return ConstantExpr::getIntToPtr(C, ResTy); } // Try to infer inbounds for GEPs of globals. // TODO(gep_nowrap): Also infer nuw flag. if (!NW.isInBounds() && Offset.isNonNegative()) { bool CanBeNull, CanBeFreed; uint64_t DerefBytes = Ptr->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed); if (DerefBytes != 0 && !CanBeNull && Offset.sle(DerefBytes)) NW |= GEPNoWrapFlags::inBounds(); } // Otherwise canonicalize this to a single ptradd. LLVMContext &Ctx = Ptr->getContext(); return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ctx), Ptr, ConstantInt::get(Ctx, Offset), NW, InRange); } /// Attempt to constant fold an instruction with the /// specified opcode and operands. If successful, the constant result is /// returned, if not, null is returned. Note that this function can fail when /// attempting to fold instructions like loads and stores, which have no /// constant expression form. Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode, ArrayRef Ops, const DataLayout &DL, const TargetLibraryInfo *TLI, bool AllowNonDeterministic) { Type *DestTy = InstOrCE->getType(); if (Instruction::isUnaryOp(Opcode)) return ConstantFoldUnaryOpOperand(Opcode, Ops[0], DL); if (Instruction::isBinaryOp(Opcode)) { switch (Opcode) { default: break; case Instruction::FAdd: case Instruction::FSub: case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: // Handle floating point instructions separately to account for denormals // TODO: If a constant expression is being folded rather than an // instruction, denormals will not be flushed/treated as zero if (const auto *I = dyn_cast(InstOrCE)) { return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I, AllowNonDeterministic); } } return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL); } if (Instruction::isCast(Opcode)) return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL); if (auto *GEP = dyn_cast(InstOrCE)) { Type *SrcElemTy = GEP->getSourceElementType(); if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy)) return nullptr; if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI)) return C; return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1), GEP->getNoWrapFlags(), GEP->getInRange()); } if (auto *CE = dyn_cast(InstOrCE)) return CE->getWithOperands(Ops); switch (Opcode) { default: return nullptr; case Instruction::ICmp: case Instruction::FCmp: { auto *C = cast(InstOrCE); return ConstantFoldCompareInstOperands(C->getPredicate(), Ops[0], Ops[1], DL, TLI, C); } case Instruction::Freeze: return isGuaranteedNotToBeUndefOrPoison(Ops[0]) ? Ops[0] : nullptr; case Instruction::Call: if (auto *F = dyn_cast(Ops.back())) { const auto *Call = cast(InstOrCE); if (canConstantFoldCallTo(Call, F)) return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI, AllowNonDeterministic); } return nullptr; case Instruction::Select: return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]); case Instruction::ExtractElement: return ConstantExpr::getExtractElement(Ops[0], Ops[1]); case Instruction::ExtractValue: return ConstantFoldExtractValueInstruction( Ops[0], cast(InstOrCE)->getIndices()); case Instruction::InsertElement: return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]); case Instruction::InsertValue: return ConstantFoldInsertValueInstruction( Ops[0], Ops[1], cast(InstOrCE)->getIndices()); case Instruction::ShuffleVector: return ConstantExpr::getShuffleVector( Ops[0], Ops[1], cast(InstOrCE)->getShuffleMask()); case Instruction::Load: { const auto *LI = dyn_cast(InstOrCE); if (LI->isVolatile()) return nullptr; return ConstantFoldLoadFromConstPtr(Ops[0], LI->getType(), DL); } } } } // end anonymous namespace //===----------------------------------------------------------------------===// // Constant Folding public APIs //===----------------------------------------------------------------------===// namespace { Constant * ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI, SmallDenseMap &FoldedOps) { if (!isa(C) && !isa(C)) return const_cast(C); SmallVector Ops; for (const Use &OldU : C->operands()) { Constant *OldC = cast(&OldU); Constant *NewC = OldC; // Recursively fold the ConstantExpr's operands. If we have already folded // a ConstantExpr, we don't have to process it again. if (isa(OldC) || isa(OldC)) { auto It = FoldedOps.find(OldC); if (It == FoldedOps.end()) { NewC = ConstantFoldConstantImpl(OldC, DL, TLI, FoldedOps); FoldedOps.insert({OldC, NewC}); } else { NewC = It->second; } } Ops.push_back(NewC); } if (auto *CE = dyn_cast(C)) { if (Constant *Res = ConstantFoldInstOperandsImpl( CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true)) return Res; return const_cast(C); } assert(isa(C)); return ConstantVector::get(Ops); } } // end anonymous namespace Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL, const TargetLibraryInfo *TLI) { // Handle PHI nodes quickly here... if (auto *PN = dyn_cast(I)) { Constant *CommonValue = nullptr; SmallDenseMap FoldedOps; for (Value *Incoming : PN->incoming_values()) { // If the incoming value is undef then skip it. Note that while we could // skip the value if it is equal to the phi node itself we choose not to // because that would break the rule that constant folding only applies if // all operands are constants. if (isa(Incoming)) continue; // If the incoming value is not a constant, then give up. auto *C = dyn_cast(Incoming); if (!C) return nullptr; // Fold the PHI's operands. C = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); // If the incoming value is a different constant to // the one we saw previously, then give up. if (CommonValue && C != CommonValue) return nullptr; CommonValue = C; } // If we reach here, all incoming values are the same constant or undef. return CommonValue ? CommonValue : UndefValue::get(PN->getType()); } // Scan the operand list, checking to see if they are all constants, if so, // hand off to ConstantFoldInstOperandsImpl. if (!all_of(I->operands(), [](Use &U) { return isa(U); })) return nullptr; SmallDenseMap FoldedOps; SmallVector Ops; for (const Use &OpU : I->operands()) { auto *Op = cast(&OpU); // Fold the Instruction's operands. Op = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps); Ops.push_back(Op); } return ConstantFoldInstOperands(I, Ops, DL, TLI); } Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI) { SmallDenseMap FoldedOps; return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps); } Constant *llvm::ConstantFoldInstOperands(Instruction *I, ArrayRef Ops, const DataLayout &DL, const TargetLibraryInfo *TLI, bool AllowNonDeterministic) { return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI, AllowNonDeterministic); } Constant *llvm::ConstantFoldCompareInstOperands( unsigned IntPredicate, Constant *Ops0, Constant *Ops1, const DataLayout &DL, const TargetLibraryInfo *TLI, const Instruction *I) { CmpInst::Predicate Predicate = (CmpInst::Predicate)IntPredicate; // fold: icmp (inttoptr x), null -> icmp x, 0 // fold: icmp null, (inttoptr x) -> icmp 0, x // fold: icmp (ptrtoint x), 0 -> icmp x, null // fold: icmp 0, (ptrtoint x) -> icmp null, x // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y // // FIXME: The following comment is out of data and the DataLayout is here now. // ConstantExpr::getCompare cannot do this, because it doesn't have DL // around to know if bit truncation is happening. if (auto *CE0 = dyn_cast(Ops0)) { if (Ops1->isNullValue()) { if (CE0->getOpcode() == Instruction::IntToPtr) { Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. if (Constant *C = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy, /*IsSigned*/ false, DL)) { Constant *Null = Constant::getNullValue(C->getType()); return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); } } // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. if (CE0->getOpcode() == Instruction::PtrToInt) { Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); if (CE0->getType() == IntPtrTy) { Constant *C = CE0->getOperand(0); Constant *Null = Constant::getNullValue(C->getType()); return ConstantFoldCompareInstOperands(Predicate, C, Null, DL, TLI); } } } if (auto *CE1 = dyn_cast(Ops1)) { if (CE0->getOpcode() == CE1->getOpcode()) { if (CE0->getOpcode() == Instruction::IntToPtr) { Type *IntPtrTy = DL.getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C0 = ConstantFoldIntegerCast(CE0->getOperand(0), IntPtrTy, /*IsSigned*/ false, DL); Constant *C1 = ConstantFoldIntegerCast(CE1->getOperand(0), IntPtrTy, /*IsSigned*/ false, DL); if (C0 && C1) return ConstantFoldCompareInstOperands(Predicate, C0, C1, DL, TLI); } // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. if (CE0->getOpcode() == Instruction::PtrToInt) { Type *IntPtrTy = DL.getIntPtrType(CE0->getOperand(0)->getType()); if (CE0->getType() == IntPtrTy && CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { return ConstantFoldCompareInstOperands( Predicate, CE0->getOperand(0), CE1->getOperand(0), DL, TLI); } } } } // Convert pointer comparison (base+offset1) pred (base+offset2) into // offset1 pred offset2, for the case where the offset is inbounds. This // only works for equality and unsigned comparison, as inbounds permits // crossing the sign boundary. However, the offset comparison itself is // signed. if (Ops0->getType()->isPointerTy() && !ICmpInst::isSigned(Predicate)) { unsigned IndexWidth = DL.getIndexTypeSizeInBits(Ops0->getType()); APInt Offset0(IndexWidth, 0); Value *Stripped0 = Ops0->stripAndAccumulateInBoundsConstantOffsets(DL, Offset0); APInt Offset1(IndexWidth, 0); Value *Stripped1 = Ops1->stripAndAccumulateInBoundsConstantOffsets(DL, Offset1); if (Stripped0 == Stripped1) return ConstantInt::getBool( Ops0->getContext(), ICmpInst::compare(Offset0, Offset1, ICmpInst::getSignedPredicate(Predicate))); } } else if (isa(Ops1)) { // If RHS is a constant expression, but the left side isn't, swap the // operands and try again. Predicate = ICmpInst::getSwappedPredicate(Predicate); return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI); } // Flush any denormal constant float input according to denormal handling // mode. Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false); if (!Ops0) return nullptr; Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false); if (!Ops1) return nullptr; return ConstantFoldCompareInstruction(Predicate, Ops0, Ops1); } Constant *llvm::ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op, const DataLayout &DL) { assert(Instruction::isUnaryOp(Opcode)); return ConstantFoldUnaryInstruction(Opcode, Op); } Constant *llvm::ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL) { assert(Instruction::isBinaryOp(Opcode)); if (isa(LHS) || isa(RHS)) if (Constant *C = SymbolicallyEvaluateBinop(Opcode, LHS, RHS, DL)) return C; if (ConstantExpr::isDesirableBinOp(Opcode)) return ConstantExpr::get(Opcode, LHS, RHS); return ConstantFoldBinaryInstruction(Opcode, LHS, RHS); } Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I, bool IsOutput) { if (!I || !I->getParent() || !I->getFunction()) return Operand; ConstantFP *CFP = dyn_cast(Operand); if (!CFP) return Operand; const APFloat &APF = CFP->getValueAPF(); // TODO: Should this canonicalize nans? if (!APF.isDenormal()) return Operand; Type *Ty = CFP->getType(); DenormalMode DenormMode = I->getFunction()->getDenormalMode(Ty->getFltSemantics()); DenormalMode::DenormalModeKind Mode = IsOutput ? DenormMode.Output : DenormMode.Input; switch (Mode) { default: llvm_unreachable("unknown denormal mode"); case DenormalMode::Dynamic: return nullptr; case DenormalMode::IEEE: return Operand; case DenormalMode::PreserveSign: if (APF.isDenormal()) { return ConstantFP::get( Ty->getContext(), APFloat::getZero(Ty->getFltSemantics(), APF.isNegative())); } return Operand; case DenormalMode::PositiveZero: if (APF.isDenormal()) { return ConstantFP::get(Ty->getContext(), APFloat::getZero(Ty->getFltSemantics(), false)); } return Operand; } return Operand; } Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL, const Instruction *I, bool AllowNonDeterministic) { if (Instruction::isBinaryOp(Opcode)) { // Flush denormal inputs if needed. Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false); if (!Op0) return nullptr; Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false); if (!Op1) return nullptr; // If nsz or an algebraic FMF flag is set, the result of the FP operation // may change due to future optimization. Don't constant fold them if // non-deterministic results are not allowed. if (!AllowNonDeterministic) if (auto *FP = dyn_cast_or_null(I)) if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() || FP->hasAllowContract() || FP->hasAllowReciprocal()) return nullptr; // Calculate constant result. Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL); if (!C) return nullptr; // Flush denormal output if needed. C = FlushFPConstant(C, I, /* IsOutput */ true); if (!C) return nullptr; // The precise NaN value is non-deterministic. if (!AllowNonDeterministic && C->isNaN()) return nullptr; return C; } // If instruction lacks a parent/function and the denormal mode cannot be // determined, use the default (IEEE). return ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL); } Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL) { assert(Instruction::isCast(Opcode)); switch (Opcode) { default: llvm_unreachable("Missing case"); case Instruction::PtrToInt: if (auto *CE = dyn_cast(C)) { Constant *FoldedValue = nullptr; // If the input is a inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. if (CE->getOpcode() == Instruction::IntToPtr) { // zext/trunc the inttoptr to pointer size. FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0), DL.getIntPtrType(CE->getType()), /*IsSigned=*/false, DL); } else if (auto *GEP = dyn_cast(CE)) { // If we have GEP, we can perform the following folds: // (ptrtoint (gep null, x)) -> x // (ptrtoint (gep (gep null, x), y) -> x + y, etc. unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); APInt BaseOffset(BitWidth, 0); auto *Base = cast(GEP->stripAndAccumulateConstantOffsets( DL, BaseOffset, /*AllowNonInbounds=*/true)); if (Base->isNullValue()) { FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset); } else { // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V if (GEP->getNumIndices() == 1 && GEP->getSourceElementType()->isIntegerTy(8)) { auto *Ptr = cast(GEP->getPointerOperand()); auto *Sub = dyn_cast(GEP->getOperand(1)); Type *IntIdxTy = DL.getIndexType(Ptr->getType()); if (Sub && Sub->getType() == IntIdxTy && Sub->getOpcode() == Instruction::Sub && Sub->getOperand(0)->isNullValue()) FoldedValue = ConstantExpr::getSub( ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1)); } } } if (FoldedValue) { // Do a zext or trunc to get to the ptrtoint dest size. return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false, DL); } } break; case Instruction::IntToPtr: // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if // the int size is >= the ptr size and the address spaces are the same. // This requires knowing the width of a pointer, so it can't be done in // ConstantExpr::getCast. if (auto *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::PtrToInt) { Constant *SrcPtr = CE->getOperand(0); unsigned SrcPtrSize = DL.getPointerTypeSizeInBits(SrcPtr->getType()); unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); if (MidIntSize >= SrcPtrSize) { unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); if (SrcAS == DestTy->getPointerAddressSpace()) return FoldBitCast(CE->getOperand(0), DestTy, DL); } } } break; case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::UIToFP: case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: case Instruction::AddrSpaceCast: break; case Instruction::BitCast: return FoldBitCast(C, DestTy, DL); } if (ConstantExpr::isDesirableCastOp(Opcode)) return ConstantExpr::getCast(Opcode, C, DestTy); return ConstantFoldCastInstruction(Opcode, C, DestTy); } Constant *llvm::ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL) { Type *SrcTy = C->getType(); if (SrcTy == DestTy) return C; if (SrcTy->getScalarSizeInBits() > DestTy->getScalarSizeInBits()) return ConstantFoldCastOperand(Instruction::Trunc, C, DestTy, DL); if (IsSigned) return ConstantFoldCastOperand(Instruction::SExt, C, DestTy, DL); return ConstantFoldCastOperand(Instruction::ZExt, C, DestTy, DL); } //===----------------------------------------------------------------------===// // Constant Folding for Calls // bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { if (Call->isNoBuiltin()) return false; if (Call->getFunctionType() != F->getFunctionType()) return false; switch (F->getIntrinsicID()) { // Operations that do not operate floating-point numbers and do not depend on // FP environment can be folded even in strictfp functions. case Intrinsic::bswap: case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: case Intrinsic::fshl: case Intrinsic::fshr: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::masked_load: case Intrinsic::get_active_lane_mask: case Intrinsic::abs: case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: case Intrinsic::umin: case Intrinsic::scmp: case Intrinsic::ucmp: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: case Intrinsic::sadd_sat: case Intrinsic::uadd_sat: case Intrinsic::ssub_sat: case Intrinsic::usub_sat: case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: case Intrinsic::bitreverse: case Intrinsic::is_constant: case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: case Intrinsic::vector_reduce_or: case Intrinsic::vector_reduce_xor: case Intrinsic::vector_reduce_smin: case Intrinsic::vector_reduce_smax: case Intrinsic::vector_reduce_umin: case Intrinsic::vector_reduce_umax: // Target intrinsics case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: case Intrinsic::amdgcn_s_wqm: case Intrinsic::amdgcn_s_quadmask: case Intrinsic::amdgcn_s_bitreplicate: case Intrinsic::arm_mve_vctp8: case Intrinsic::arm_mve_vctp16: case Intrinsic::arm_mve_vctp32: case Intrinsic::arm_mve_vctp64: case Intrinsic::aarch64_sve_convert_from_svbool: // WebAssembly float semantics are always known case Intrinsic::wasm_trunc_signed: case Intrinsic::wasm_trunc_unsigned: return true; // Floating point operations cannot be folded in strictfp functions in // general case. They can be folded if FP environment is known to compiler. case Intrinsic::minnum: case Intrinsic::maxnum: case Intrinsic::minimum: case Intrinsic::maximum: case Intrinsic::log: case Intrinsic::log2: case Intrinsic::log10: case Intrinsic::exp: case Intrinsic::exp2: case Intrinsic::exp10: case Intrinsic::sqrt: case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::pow: case Intrinsic::powi: case Intrinsic::ldexp: case Intrinsic::fma: case Intrinsic::fmuladd: case Intrinsic::frexp: case Intrinsic::fptoui_sat: case Intrinsic::fptosi_sat: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::amdgcn_cos: case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: case Intrinsic::amdgcn_cubesc: case Intrinsic::amdgcn_cubetc: case Intrinsic::amdgcn_fmul_legacy: case Intrinsic::amdgcn_fma_legacy: case Intrinsic::amdgcn_fract: case Intrinsic::amdgcn_sin: // The intrinsics below depend on rounding mode in MXCSR. case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvtsd2si: case Intrinsic::x86_sse2_cvtsd2si64: case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse2_cvttsd2si64: case Intrinsic::x86_avx512_vcvtss2si32: case Intrinsic::x86_avx512_vcvtss2si64: case Intrinsic::x86_avx512_cvttss2si: case Intrinsic::x86_avx512_cvttss2si64: case Intrinsic::x86_avx512_vcvtsd2si32: case Intrinsic::x86_avx512_vcvtsd2si64: case Intrinsic::x86_avx512_cvttsd2si: case Intrinsic::x86_avx512_cvttsd2si64: case Intrinsic::x86_avx512_vcvtss2usi32: case Intrinsic::x86_avx512_vcvtss2usi64: case Intrinsic::x86_avx512_cvttss2usi: case Intrinsic::x86_avx512_cvttss2usi64: case Intrinsic::x86_avx512_vcvtsd2usi32: case Intrinsic::x86_avx512_vcvtsd2usi64: case Intrinsic::x86_avx512_cvttsd2usi: case Intrinsic::x86_avx512_cvttsd2usi64: return !Call->isStrictFP(); // Sign operations are actually bitwise operations, they do not raise // exceptions even for SNANs. case Intrinsic::fabs: case Intrinsic::copysign: case Intrinsic::is_fpclass: // Non-constrained variants of rounding operations means default FP // environment, they can be folded in any case. case Intrinsic::ceil: case Intrinsic::floor: case Intrinsic::round: case Intrinsic::roundeven: case Intrinsic::trunc: case Intrinsic::nearbyint: case Intrinsic::rint: case Intrinsic::canonicalize: // Constrained intrinsics can be folded if FP environment is known // to compiler. case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_fmuladd: case Intrinsic::experimental_constrained_fadd: case Intrinsic::experimental_constrained_fsub: case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_roundeven: case Intrinsic::experimental_constrained_trunc: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_fcmp: case Intrinsic::experimental_constrained_fcmps: return true; default: return false; case Intrinsic::not_intrinsic: break; } if (!F->hasName() || Call->isStrictFP()) return false; // In these cases, the check of the length is required. We don't want to // return true for a name like "cos\0blah" which strcmp would return equal to // "cos", but has length 8. StringRef Name = F->getName(); switch (Name[0]) { default: return false; case 'a': return Name == "acos" || Name == "acosf" || Name == "asin" || Name == "asinf" || Name == "atan" || Name == "atanf" || Name == "atan2" || Name == "atan2f"; case 'c': return Name == "ceil" || Name == "ceilf" || Name == "cos" || Name == "cosf" || Name == "cosh" || Name == "coshf"; case 'e': return Name == "exp" || Name == "expf" || Name == "exp2" || Name == "exp2f"; case 'f': return Name == "fabs" || Name == "fabsf" || Name == "floor" || Name == "floorf" || Name == "fmod" || Name == "fmodf"; case 'l': return Name == "log" || Name == "logf" || Name == "log2" || Name == "log2f" || Name == "log10" || Name == "log10f" || Name == "logl"; case 'n': return Name == "nearbyint" || Name == "nearbyintf"; case 'p': return Name == "pow" || Name == "powf"; case 'r': return Name == "remainder" || Name == "remainderf" || Name == "rint" || Name == "rintf" || Name == "round" || Name == "roundf"; case 's': return Name == "sin" || Name == "sinf" || Name == "sinh" || Name == "sinhf" || Name == "sqrt" || Name == "sqrtf"; case 't': return Name == "tan" || Name == "tanf" || Name == "tanh" || Name == "tanhf" || Name == "trunc" || Name == "truncf"; case '_': // Check for various function names that get used for the math functions // when the header files are preprocessed with the macro // __FINITE_MATH_ONLY__ enabled. // The '12' here is the length of the shortest name that can match. // We need to check the size before looking at Name[1] and Name[2] // so we may as well check a limit that will eliminate mismatches. if (Name.size() < 12 || Name[1] != '_') return false; switch (Name[2]) { default: return false; case 'a': return Name == "__acos_finite" || Name == "__acosf_finite" || Name == "__asin_finite" || Name == "__asinf_finite" || Name == "__atan2_finite" || Name == "__atan2f_finite"; case 'c': return Name == "__cosh_finite" || Name == "__coshf_finite"; case 'e': return Name == "__exp_finite" || Name == "__expf_finite" || Name == "__exp2_finite" || Name == "__exp2f_finite"; case 'l': return Name == "__log_finite" || Name == "__logf_finite" || Name == "__log10_finite" || Name == "__log10f_finite"; case 'p': return Name == "__pow_finite" || Name == "__powf_finite"; case 's': return Name == "__sinh_finite" || Name == "__sinhf_finite"; } } } namespace { Constant *GetConstantFoldFPValue(double V, Type *Ty) { if (Ty->isHalfTy() || Ty->isFloatTy()) { APFloat APF(V); bool unused; APF.convert(Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &unused); return ConstantFP::get(Ty->getContext(), APF); } if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); llvm_unreachable("Can only constant fold half/float/double"); } #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) Constant *GetConstantFoldFPValue128(float128 V, Type *Ty) { if (Ty->isFP128Ty()) return ConstantFP::get(Ty, V); llvm_unreachable("Can only constant fold fp128"); } #endif /// Clear the floating-point exception state. inline void llvm_fenv_clearexcept() { #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT feclearexcept(FE_ALL_EXCEPT); #endif errno = 0; } /// Test if a floating-point exception was raised. inline bool llvm_fenv_testexcept() { int errno_val = errno; if (errno_val == ERANGE || errno_val == EDOM) return true; #if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT)) return true; #endif return false; } Constant *ConstantFoldFP(double (*NativeFP)(double), const APFloat &V, Type *Ty) { llvm_fenv_clearexcept(); double Result = NativeFP(V.convertToDouble()); if (llvm_fenv_testexcept()) { llvm_fenv_clearexcept(); return nullptr; } return GetConstantFoldFPValue(Result, Ty); } #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) -Constant *ConstantFoldFP128(long double (*NativeFP)(long double), - const APFloat &V, Type *Ty) { +Constant *ConstantFoldFP128(float128 (*NativeFP)(float128), const APFloat &V, + Type *Ty) { llvm_fenv_clearexcept(); float128 Result = NativeFP(V.convertToQuad()); if (llvm_fenv_testexcept()) { llvm_fenv_clearexcept(); return nullptr; } return GetConstantFoldFPValue128(Result, Ty); } #endif Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), const APFloat &V, const APFloat &W, Type *Ty) { llvm_fenv_clearexcept(); double Result = NativeFP(V.convertToDouble(), W.convertToDouble()); if (llvm_fenv_testexcept()) { llvm_fenv_clearexcept(); return nullptr; } return GetConstantFoldFPValue(Result, Ty); } Constant *constantFoldVectorReduce(Intrinsic::ID IID, Constant *Op) { FixedVectorType *VT = dyn_cast(Op->getType()); if (!VT) return nullptr; // This isn't strictly necessary, but handle the special/common case of zero: // all integer reductions of a zero input produce zero. if (isa(Op)) return ConstantInt::get(VT->getElementType(), 0); // This is the same as the underlying binops - poison propagates. if (isa(Op) || Op->containsPoisonElement()) return PoisonValue::get(VT->getElementType()); // TODO: Handle undef. if (!isa(Op) && !isa(Op)) return nullptr; auto *EltC = dyn_cast(Op->getAggregateElement(0U)); if (!EltC) return nullptr; APInt Acc = EltC->getValue(); for (unsigned I = 1, E = VT->getNumElements(); I != E; I++) { if (!(EltC = dyn_cast(Op->getAggregateElement(I)))) return nullptr; const APInt &X = EltC->getValue(); switch (IID) { case Intrinsic::vector_reduce_add: Acc = Acc + X; break; case Intrinsic::vector_reduce_mul: Acc = Acc * X; break; case Intrinsic::vector_reduce_and: Acc = Acc & X; break; case Intrinsic::vector_reduce_or: Acc = Acc | X; break; case Intrinsic::vector_reduce_xor: Acc = Acc ^ X; break; case Intrinsic::vector_reduce_smin: Acc = APIntOps::smin(Acc, X); break; case Intrinsic::vector_reduce_smax: Acc = APIntOps::smax(Acc, X); break; case Intrinsic::vector_reduce_umin: Acc = APIntOps::umin(Acc, X); break; case Intrinsic::vector_reduce_umax: Acc = APIntOps::umax(Acc, X); break; } } return ConstantInt::get(Op->getContext(), Acc); } /// Attempt to fold an SSE floating point to integer conversion of a constant /// floating point. If roundTowardZero is false, the default IEEE rounding is /// used (toward nearest, ties to even). This matches the behavior of the /// non-truncating SSE instructions in the default rounding mode. The desired /// integer type Ty is used to select how many bits are available for the /// result. Returns null if the conversion cannot be performed, otherwise /// returns the Constant value resulting from the conversion. Constant *ConstantFoldSSEConvertToInt(const APFloat &Val, bool roundTowardZero, Type *Ty, bool IsSigned) { // All of these conversion intrinsics form an integer of at most 64bits. unsigned ResultWidth = Ty->getIntegerBitWidth(); assert(ResultWidth <= 64 && "Can only constant fold conversions to 64 and 32 bit ints"); uint64_t UIntVal; bool isExact = false; APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero : APFloat::rmNearestTiesToEven; APFloat::opStatus status = Val.convertToInteger(MutableArrayRef(UIntVal), ResultWidth, IsSigned, mode, &isExact); if (status != APFloat::opOK && (!roundTowardZero || status != APFloat::opInexact)) return nullptr; return ConstantInt::get(Ty, UIntVal, IsSigned); } double getValueAsDouble(ConstantFP *Op) { Type *Ty = Op->getType(); if (Ty->isBFloatTy() || Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return Op->getValueAPF().convertToDouble(); bool unused; APFloat APF = Op->getValueAPF(); APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused); return APF.convertToDouble(); } static bool getConstIntOrUndef(Value *Op, const APInt *&C) { if (auto *CI = dyn_cast(Op)) { C = &CI->getValue(); return true; } if (isa(Op)) { C = nullptr; return true; } return false; } /// Checks if the given intrinsic call, which evaluates to constant, is allowed /// to be folded. /// /// \param CI Constrained intrinsic call. /// \param St Exception flags raised during constant evaluation. static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, APFloat::opStatus St) { std::optional ORM = CI->getRoundingMode(); std::optional EB = CI->getExceptionBehavior(); // If the operation does not change exception status flags, it is safe // to fold. if (St == APFloat::opStatus::opOK) return true; // If evaluation raised FP exception, the result can depend on rounding // mode. If the latter is unknown, folding is not possible. if (ORM && *ORM == RoundingMode::Dynamic) return false; // If FP exceptions are ignored, fold the call, even if such exception is // raised. if (EB && *EB != fp::ExceptionBehavior::ebStrict) return true; // Leave the calculation for runtime so that exception flags be correctly set // in hardware. return false; } /// Returns the rounding mode that should be used for constant evaluation. static RoundingMode getEvaluationRoundingMode(const ConstrainedFPIntrinsic *CI) { std::optional ORM = CI->getRoundingMode(); if (!ORM || *ORM == RoundingMode::Dynamic) // Even if the rounding mode is unknown, try evaluating the operation. // If it does not raise inexact exception, rounding was not applied, // so the result is exact and does not depend on rounding mode. Whether // other FP exceptions are raised, it does not depend on rounding mode. return RoundingMode::NearestTiesToEven; return *ORM; } /// Try to constant fold llvm.canonicalize for the given caller and value. static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI, const APFloat &Src) { // Zero, positive and negative, is always OK to fold. if (Src.isZero()) { // Get a fresh 0, since ppc_fp128 does have non-canonical zeros. return ConstantFP::get( CI->getContext(), APFloat::getZero(Src.getSemantics(), Src.isNegative())); } if (!Ty->isIEEELikeFPTy()) return nullptr; // Zero is always canonical and the sign must be preserved. // // Denorms and nans may have special encodings, but it should be OK to fold a // totally average number. if (Src.isNormal() || Src.isInfinity()) return ConstantFP::get(CI->getContext(), Src); if (Src.isDenormal() && CI->getParent() && CI->getFunction()) { DenormalMode DenormMode = CI->getFunction()->getDenormalMode(Src.getSemantics()); if (DenormMode == DenormalMode::getIEEE()) return ConstantFP::get(CI->getContext(), Src); if (DenormMode.Input == DenormalMode::Dynamic) return nullptr; // If we know if either input or output is flushed, we can fold. if ((DenormMode.Input == DenormalMode::Dynamic && DenormMode.Output == DenormalMode::IEEE) || (DenormMode.Input == DenormalMode::IEEE && DenormMode.Output == DenormalMode::Dynamic)) return nullptr; bool IsPositive = (!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero || (DenormMode.Output == DenormalMode::PositiveZero && DenormMode.Input == DenormalMode::IEEE)); return ConstantFP::get(CI->getContext(), APFloat::getZero(Src.getSemantics(), !IsPositive)); } return nullptr; } static Constant *ConstantFoldScalarCall1(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, ArrayRef Operands, const TargetLibraryInfo *TLI, const CallBase *Call) { assert(Operands.size() == 1 && "Wrong number of operands."); if (IntrinsicID == Intrinsic::is_constant) { // We know we have a "Constant" argument. But we want to only // return true for manifest constants, not those that depend on // constants with unknowable values, e.g. GlobalValue or BlockAddress. if (Operands[0]->isManifestConstant()) return ConstantInt::getTrue(Ty->getContext()); return nullptr; } if (isa(Operands[0])) { // TODO: All of these operations should probably propagate poison. if (IntrinsicID == Intrinsic::canonicalize) return PoisonValue::get(Ty); } if (isa(Operands[0])) { // cosine(arg) is between -1 and 1. cosine(invalid arg) is NaN. // ctpop() is between 0 and bitwidth, pick 0 for undef. // fptoui.sat and fptosi.sat can always fold to zero (for a zero input). if (IntrinsicID == Intrinsic::cos || IntrinsicID == Intrinsic::ctpop || IntrinsicID == Intrinsic::fptoui_sat || IntrinsicID == Intrinsic::fptosi_sat || IntrinsicID == Intrinsic::canonicalize) return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::bswap || IntrinsicID == Intrinsic::bitreverse || IntrinsicID == Intrinsic::launder_invariant_group || IntrinsicID == Intrinsic::strip_invariant_group) return Operands[0]; } if (isa(Operands[0])) { // launder(null) == null == strip(null) iff in addrspace 0 if (IntrinsicID == Intrinsic::launder_invariant_group || IntrinsicID == Intrinsic::strip_invariant_group) { // If instruction is not yet put in a basic block (e.g. when cloning // a function during inlining), Call's caller may not be available. // So check Call's BB first before querying Call->getCaller. const Function *Caller = Call->getParent() ? Call->getCaller() : nullptr; if (Caller && !NullPointerIsDefined( Caller, Operands[0]->getType()->getPointerAddressSpace())) { return Operands[0]; } return nullptr; } } if (auto *Op = dyn_cast(Operands[0])) { if (IntrinsicID == Intrinsic::convert_to_fp16) { APFloat Val(Op->getValueAPF()); bool lost = false; Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost); return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt()); } APFloat U = Op->getValueAPF(); if (IntrinsicID == Intrinsic::wasm_trunc_signed || IntrinsicID == Intrinsic::wasm_trunc_unsigned) { bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed; if (U.isNaN()) return nullptr; unsigned Width = Ty->getIntegerBitWidth(); APSInt Int(Width, !Signed); bool IsExact = false; APFloat::opStatus Status = U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); if (Status == APFloat::opOK || Status == APFloat::opInexact) return ConstantInt::get(Ty, Int); return nullptr; } if (IntrinsicID == Intrinsic::fptoui_sat || IntrinsicID == Intrinsic::fptosi_sat) { // convertToInteger() already has the desired saturation semantics. APSInt Int(Ty->getIntegerBitWidth(), IntrinsicID == Intrinsic::fptoui_sat); bool IsExact; U.convertToInteger(Int, APFloat::rmTowardZero, &IsExact); return ConstantInt::get(Ty, Int); } if (IntrinsicID == Intrinsic::canonicalize) return constantFoldCanonicalize(Ty, Call, U); #if defined(HAS_IEE754_FLOAT128) && defined(HAS_LOGF128) if (Ty->isFP128Ty()) { if (IntrinsicID == Intrinsic::log) { float128 Result = logf128(Op->getValueAPF().convertToQuad()); return GetConstantFoldFPValue128(Result, Ty); } LibFunc Fp128Func = NotLibFunc; if (TLI->getLibFunc(Name, Fp128Func) && TLI->has(Fp128Func) && Fp128Func == LibFunc_logl) return ConstantFoldFP128(logf128, Op->getValueAPF(), Ty); } #endif if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; // Use internal versions of these intrinsics. if (IntrinsicID == Intrinsic::nearbyint || IntrinsicID == Intrinsic::rint) { U.roundToIntegral(APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::round) { U.roundToIntegral(APFloat::rmNearestTiesToAway); return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::roundeven) { U.roundToIntegral(APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::ceil) { U.roundToIntegral(APFloat::rmTowardPositive); return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::floor) { U.roundToIntegral(APFloat::rmTowardNegative); return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::trunc) { U.roundToIntegral(APFloat::rmTowardZero); return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::fabs) { U.clearSign(); return ConstantFP::get(Ty->getContext(), U); } if (IntrinsicID == Intrinsic::amdgcn_fract) { // The v_fract instruction behaves like the OpenCL spec, which defines // fract(x) as fmin(x - floor(x), 0x1.fffffep-1f): "The min() operator is // there to prevent fract(-small) from returning 1.0. It returns the // largest positive floating-point number less than 1.0." APFloat FloorU(U); FloorU.roundToIntegral(APFloat::rmTowardNegative); APFloat FractU(U - FloorU); APFloat AlmostOne(U.getSemantics(), 1); AlmostOne.next(/*nextDown*/ true); return ConstantFP::get(Ty->getContext(), minimum(FractU, AlmostOne)); } // Rounding operations (floor, trunc, ceil, round and nearbyint) do not // raise FP exceptions, unless the argument is signaling NaN. std::optional RM; switch (IntrinsicID) { default: break; case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_rint: { auto CI = cast(Call); RM = CI->getRoundingMode(); if (!RM || *RM == RoundingMode::Dynamic) return nullptr; break; } case Intrinsic::experimental_constrained_round: RM = APFloat::rmNearestTiesToAway; break; case Intrinsic::experimental_constrained_ceil: RM = APFloat::rmTowardPositive; break; case Intrinsic::experimental_constrained_floor: RM = APFloat::rmTowardNegative; break; case Intrinsic::experimental_constrained_trunc: RM = APFloat::rmTowardZero; break; } if (RM) { auto CI = cast(Call); if (U.isFinite()) { APFloat::opStatus St = U.roundToIntegral(*RM); if (IntrinsicID == Intrinsic::experimental_constrained_rint && St == APFloat::opInexact) { std::optional EB = CI->getExceptionBehavior(); if (EB && *EB == fp::ebStrict) return nullptr; } } else if (U.isSignaling()) { std::optional EB = CI->getExceptionBehavior(); if (EB && *EB != fp::ebIgnore) return nullptr; U = APFloat::getQNaN(U.getSemantics()); } return ConstantFP::get(Ty->getContext(), U); } /// We only fold functions with finite arguments. Folding NaN and inf is /// likely to be aborted with an exception anyway, and some host libms /// have known errors raising exceptions. if (!U.isFinite()) return nullptr; /// Currently APFloat versions of these functions do not exist, so we use /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. const APFloat &APF = Op->getValueAPF(); switch (IntrinsicID) { default: break; case Intrinsic::log: return ConstantFoldFP(log, APF, Ty); case Intrinsic::log2: // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log2, APF, Ty); case Intrinsic::log10: // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, APF, Ty); case Intrinsic::exp: return ConstantFoldFP(exp, APF, Ty); case Intrinsic::exp2: // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); case Intrinsic::exp10: // Fold exp10(x) as pow(10, x), in case the host lacks a C99 library. return ConstantFoldBinaryFP(pow, APFloat(10.0), APF, Ty); case Intrinsic::sin: return ConstantFoldFP(sin, APF, Ty); case Intrinsic::cos: return ConstantFoldFP(cos, APF, Ty); case Intrinsic::sqrt: return ConstantFoldFP(sqrt, APF, Ty); case Intrinsic::amdgcn_cos: case Intrinsic::amdgcn_sin: { double V = getValueAsDouble(Op); if (V < -256.0 || V > 256.0) // The gfx8 and gfx9 architectures handle arguments outside the range // [-256, 256] differently. This should be a rare case so bail out // rather than trying to handle the difference. return nullptr; bool IsCos = IntrinsicID == Intrinsic::amdgcn_cos; double V4 = V * 4.0; if (V4 == floor(V4)) { // Force exact results for quarter-integer inputs. const double SinVals[4] = { 0.0, 1.0, 0.0, -1.0 }; V = SinVals[((int)V4 + (IsCos ? 1 : 0)) & 3]; } else { if (IsCos) V = cos(V * 2.0 * numbers::pi); else V = sin(V * 2.0 * numbers::pi); } return GetConstantFoldFPValue(V, Ty); } } if (!TLI) return nullptr; LibFunc Func = NotLibFunc; if (!TLI->getLibFunc(Name, Func)) return nullptr; switch (Func) { default: break; case LibFunc_acos: case LibFunc_acosf: case LibFunc_acos_finite: case LibFunc_acosf_finite: if (TLI->has(Func)) return ConstantFoldFP(acos, APF, Ty); break; case LibFunc_asin: case LibFunc_asinf: case LibFunc_asin_finite: case LibFunc_asinf_finite: if (TLI->has(Func)) return ConstantFoldFP(asin, APF, Ty); break; case LibFunc_atan: case LibFunc_atanf: if (TLI->has(Func)) return ConstantFoldFP(atan, APF, Ty); break; case LibFunc_ceil: case LibFunc_ceilf: if (TLI->has(Func)) { U.roundToIntegral(APFloat::rmTowardPositive); return ConstantFP::get(Ty->getContext(), U); } break; case LibFunc_cos: case LibFunc_cosf: if (TLI->has(Func)) return ConstantFoldFP(cos, APF, Ty); break; case LibFunc_cosh: case LibFunc_coshf: case LibFunc_cosh_finite: case LibFunc_coshf_finite: if (TLI->has(Func)) return ConstantFoldFP(cosh, APF, Ty); break; case LibFunc_exp: case LibFunc_expf: case LibFunc_exp_finite: case LibFunc_expf_finite: if (TLI->has(Func)) return ConstantFoldFP(exp, APF, Ty); break; case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2_finite: case LibFunc_exp2f_finite: if (TLI->has(Func)) // Fold exp2(x) as pow(2, x), in case the host lacks a C99 library. return ConstantFoldBinaryFP(pow, APFloat(2.0), APF, Ty); break; case LibFunc_fabs: case LibFunc_fabsf: if (TLI->has(Func)) { U.clearSign(); return ConstantFP::get(Ty->getContext(), U); } break; case LibFunc_floor: case LibFunc_floorf: if (TLI->has(Func)) { U.roundToIntegral(APFloat::rmTowardNegative); return ConstantFP::get(Ty->getContext(), U); } break; case LibFunc_log: case LibFunc_logf: case LibFunc_log_finite: case LibFunc_logf_finite: if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) return ConstantFoldFP(log, APF, Ty); break; case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2_finite: case LibFunc_log2f_finite: if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log2, APF, Ty); break; case LibFunc_log10: case LibFunc_log10f: case LibFunc_log10_finite: case LibFunc_log10f_finite: if (!APF.isNegative() && !APF.isZero() && TLI->has(Func)) // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, APF, Ty); break; case LibFunc_logl: return nullptr; case LibFunc_nearbyint: case LibFunc_nearbyintf: case LibFunc_rint: case LibFunc_rintf: if (TLI->has(Func)) { U.roundToIntegral(APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), U); } break; case LibFunc_round: case LibFunc_roundf: if (TLI->has(Func)) { U.roundToIntegral(APFloat::rmNearestTiesToAway); return ConstantFP::get(Ty->getContext(), U); } break; case LibFunc_sin: case LibFunc_sinf: if (TLI->has(Func)) return ConstantFoldFP(sin, APF, Ty); break; case LibFunc_sinh: case LibFunc_sinhf: case LibFunc_sinh_finite: case LibFunc_sinhf_finite: if (TLI->has(Func)) return ConstantFoldFP(sinh, APF, Ty); break; case LibFunc_sqrt: case LibFunc_sqrtf: if (!APF.isNegative() && TLI->has(Func)) return ConstantFoldFP(sqrt, APF, Ty); break; case LibFunc_tan: case LibFunc_tanf: if (TLI->has(Func)) return ConstantFoldFP(tan, APF, Ty); break; case LibFunc_tanh: case LibFunc_tanhf: if (TLI->has(Func)) return ConstantFoldFP(tanh, APF, Ty); break; case LibFunc_trunc: case LibFunc_truncf: if (TLI->has(Func)) { U.roundToIntegral(APFloat::rmTowardZero); return ConstantFP::get(Ty->getContext(), U); } break; } return nullptr; } if (auto *Op = dyn_cast(Operands[0])) { switch (IntrinsicID) { case Intrinsic::bswap: return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap()); case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().popcount()); case Intrinsic::bitreverse: return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits()); case Intrinsic::convert_from_fp16: { APFloat Val(APFloat::IEEEhalf(), Op->getValue()); bool lost = false; APFloat::opStatus status = Val.convert( Ty->getFltSemantics(), APFloat::rmNearestTiesToEven, &lost); // Conversion is always precise. (void)status; assert(status != APFloat::opInexact && !lost && "Precision lost during fp16 constfolding"); return ConstantFP::get(Ty->getContext(), Val); } case Intrinsic::amdgcn_s_wqm: { uint64_t Val = Op->getZExtValue(); Val |= (Val & 0x5555555555555555ULL) << 1 | ((Val >> 1) & 0x5555555555555555ULL); Val |= (Val & 0x3333333333333333ULL) << 2 | ((Val >> 2) & 0x3333333333333333ULL); return ConstantInt::get(Ty, Val); } case Intrinsic::amdgcn_s_quadmask: { uint64_t Val = Op->getZExtValue(); uint64_t QuadMask = 0; for (unsigned I = 0; I < Op->getBitWidth() / 4; ++I, Val >>= 4) { if (!(Val & 0xF)) continue; QuadMask |= (1ULL << I); } return ConstantInt::get(Ty, QuadMask); } case Intrinsic::amdgcn_s_bitreplicate: { uint64_t Val = Op->getZExtValue(); Val = (Val & 0x000000000000FFFFULL) | (Val & 0x00000000FFFF0000ULL) << 16; Val = (Val & 0x000000FF000000FFULL) | (Val & 0x0000FF000000FF00ULL) << 8; Val = (Val & 0x000F000F000F000FULL) | (Val & 0x00F000F000F000F0ULL) << 4; Val = (Val & 0x0303030303030303ULL) | (Val & 0x0C0C0C0C0C0C0C0CULL) << 2; Val = (Val & 0x1111111111111111ULL) | (Val & 0x2222222222222222ULL) << 1; Val = Val | Val << 1; return ConstantInt::get(Ty, Val); } default: return nullptr; } } switch (IntrinsicID) { default: break; case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: case Intrinsic::vector_reduce_or: case Intrinsic::vector_reduce_xor: case Intrinsic::vector_reduce_smin: case Intrinsic::vector_reduce_smax: case Intrinsic::vector_reduce_umin: case Intrinsic::vector_reduce_umax: if (Constant *C = constantFoldVectorReduce(IntrinsicID, Operands[0])) return C; break; } // Support ConstantVector in case we have an Undef in the top. if (isa(Operands[0]) || isa(Operands[0])) { auto *Op = cast(Operands[0]); switch (IntrinsicID) { default: break; case Intrinsic::x86_sse_cvtss2si: case Intrinsic::x86_sse_cvtss2si64: case Intrinsic::x86_sse2_cvtsd2si: case Intrinsic::x86_sse2_cvtsd2si64: if (ConstantFP *FPOp = dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/false, Ty, /*IsSigned*/true); break; case Intrinsic::x86_sse_cvttss2si: case Intrinsic::x86_sse_cvttss2si64: case Intrinsic::x86_sse2_cvttsd2si: case Intrinsic::x86_sse2_cvttsd2si64: if (ConstantFP *FPOp = dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty, /*IsSigned*/true); break; } } return nullptr; } static Constant *evaluateCompare(const APFloat &Op1, const APFloat &Op2, const ConstrainedFPIntrinsic *Call) { APFloat::opStatus St = APFloat::opOK; auto *FCmp = cast(Call); FCmpInst::Predicate Cond = FCmp->getPredicate(); if (FCmp->isSignaling()) { if (Op1.isNaN() || Op2.isNaN()) St = APFloat::opInvalidOp; } else { if (Op1.isSignaling() || Op2.isSignaling()) St = APFloat::opInvalidOp; } bool Result = FCmpInst::compare(Op1, Op2, Cond); if (mayFoldConstrained(const_cast(FCmp), St)) return ConstantInt::get(Call->getType()->getScalarType(), Result); return nullptr; } static Constant *ConstantFoldLibCall2(StringRef Name, Type *Ty, ArrayRef Operands, const TargetLibraryInfo *TLI) { if (!TLI) return nullptr; LibFunc Func = NotLibFunc; if (!TLI->getLibFunc(Name, Func)) return nullptr; const auto *Op1 = dyn_cast(Operands[0]); if (!Op1) return nullptr; const auto *Op2 = dyn_cast(Operands[1]); if (!Op2) return nullptr; const APFloat &Op1V = Op1->getValueAPF(); const APFloat &Op2V = Op2->getValueAPF(); switch (Func) { default: break; case LibFunc_pow: case LibFunc_powf: case LibFunc_pow_finite: case LibFunc_powf_finite: if (TLI->has(Func)) return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); break; case LibFunc_fmod: case LibFunc_fmodf: if (TLI->has(Func)) { APFloat V = Op1->getValueAPF(); if (APFloat::opStatus::opOK == V.mod(Op2->getValueAPF())) return ConstantFP::get(Ty->getContext(), V); } break; case LibFunc_remainder: case LibFunc_remainderf: if (TLI->has(Func)) { APFloat V = Op1->getValueAPF(); if (APFloat::opStatus::opOK == V.remainder(Op2->getValueAPF())) return ConstantFP::get(Ty->getContext(), V); } break; case LibFunc_atan2: case LibFunc_atan2f: // atan2(+/-0.0, +/-0.0) is known to raise an exception on some libm // (Solaris), so we do not assume a known result for that. if (Op1V.isZero() && Op2V.isZero()) return nullptr; [[fallthrough]]; case LibFunc_atan2_finite: case LibFunc_atan2f_finite: if (TLI->has(Func)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); break; } return nullptr; } static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, ArrayRef Operands, const CallBase *Call) { assert(Operands.size() == 2 && "Wrong number of operands."); if (Ty->isFloatingPointTy()) { // TODO: We should have undef handling for all of the FP intrinsics that // are attempted to be folded in this function. bool IsOp0Undef = isa(Operands[0]); bool IsOp1Undef = isa(Operands[1]); switch (IntrinsicID) { case Intrinsic::maxnum: case Intrinsic::minnum: case Intrinsic::maximum: case Intrinsic::minimum: // If one argument is undef, return the other argument. if (IsOp0Undef) return Operands[1]; if (IsOp1Undef) return Operands[0]; break; } } if (const auto *Op1 = dyn_cast(Operands[0])) { const APFloat &Op1V = Op1->getValueAPF(); if (const auto *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) return nullptr; const APFloat &Op2V = Op2->getValueAPF(); if (const auto *ConstrIntr = dyn_cast_if_present(Call)) { RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); APFloat Res = Op1V; APFloat::opStatus St; switch (IntrinsicID) { default: return nullptr; case Intrinsic::experimental_constrained_fadd: St = Res.add(Op2V, RM); break; case Intrinsic::experimental_constrained_fsub: St = Res.subtract(Op2V, RM); break; case Intrinsic::experimental_constrained_fmul: St = Res.multiply(Op2V, RM); break; case Intrinsic::experimental_constrained_fdiv: St = Res.divide(Op2V, RM); break; case Intrinsic::experimental_constrained_frem: St = Res.mod(Op2V); break; case Intrinsic::experimental_constrained_fcmp: case Intrinsic::experimental_constrained_fcmps: return evaluateCompare(Op1V, Op2V, ConstrIntr); } if (mayFoldConstrained(const_cast(ConstrIntr), St)) return ConstantFP::get(Ty->getContext(), Res); return nullptr; } switch (IntrinsicID) { default: break; case Intrinsic::copysign: return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V)); case Intrinsic::minnum: return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V)); case Intrinsic::maxnum: return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V)); case Intrinsic::minimum: return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V)); case Intrinsic::maximum: return ConstantFP::get(Ty->getContext(), maximum(Op1V, Op2V)); } if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return nullptr; switch (IntrinsicID) { default: break; case Intrinsic::pow: return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); case Intrinsic::amdgcn_fmul_legacy: // The legacy behaviour is that multiplying +/- 0.0 by anything, even // NaN or infinity, gives +0.0. if (Op1V.isZero() || Op2V.isZero()) return ConstantFP::getZero(Ty); return ConstantFP::get(Ty->getContext(), Op1V * Op2V); } } else if (auto *Op2C = dyn_cast(Operands[1])) { switch (IntrinsicID) { case Intrinsic::ldexp: { return ConstantFP::get( Ty->getContext(), scalbn(Op1V, Op2C->getSExtValue(), APFloat::rmNearestTiesToEven)); } case Intrinsic::is_fpclass: { FPClassTest Mask = static_cast(Op2C->getZExtValue()); bool Result = ((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) || ((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) || ((Mask & fcNegInf) && Op1V.isNegInfinity()) || ((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) || ((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) || ((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) || ((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) || ((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) || ((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) || ((Mask & fcPosInf) && Op1V.isPosInfinity()); return ConstantInt::get(Ty, Result); } case Intrinsic::powi: { int Exp = static_cast(Op2C->getSExtValue()); switch (Ty->getTypeID()) { case Type::HalfTyID: case Type::FloatTyID: { APFloat Res(static_cast(std::pow(Op1V.convertToFloat(), Exp))); if (Ty->isHalfTy()) { bool Unused; Res.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Unused); } return ConstantFP::get(Ty->getContext(), Res); } case Type::DoubleTyID: return ConstantFP::get(Ty, std::pow(Op1V.convertToDouble(), Exp)); default: return nullptr; } } default: break; } } return nullptr; } if (Operands[0]->getType()->isIntegerTy() && Operands[1]->getType()->isIntegerTy()) { const APInt *C0, *C1; if (!getConstIntOrUndef(Operands[0], C0) || !getConstIntOrUndef(Operands[1], C1)) return nullptr; switch (IntrinsicID) { default: break; case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: case Intrinsic::umin: // This is the same as for binary ops - poison propagates. // TODO: Poison handling should be consolidated. if (isa(Operands[0]) || isa(Operands[1])) return PoisonValue::get(Ty); if (!C0 && !C1) return UndefValue::get(Ty); if (!C0 || !C1) return MinMaxIntrinsic::getSaturationPoint(IntrinsicID, Ty); return ConstantInt::get( Ty, ICmpInst::compare(*C0, *C1, MinMaxIntrinsic::getPredicate(IntrinsicID)) ? *C0 : *C1); case Intrinsic::scmp: case Intrinsic::ucmp: if (isa(Operands[0]) || isa(Operands[1])) return PoisonValue::get(Ty); if (!C0 || !C1) return ConstantInt::get(Ty, 0); int Res; if (IntrinsicID == Intrinsic::scmp) Res = C0->sgt(*C1) ? 1 : C0->slt(*C1) ? -1 : 0; else Res = C0->ugt(*C1) ? 1 : C0->ult(*C1) ? -1 : 0; return ConstantInt::get(Ty, Res, /*IsSigned=*/true); case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: // X - undef -> { 0, false } // undef - X -> { 0, false } if (!C0 || !C1) return Constant::getNullValue(Ty); [[fallthrough]]; case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: // X + undef -> { -1, false } // undef + x -> { -1, false } if (!C0 || !C1) { return ConstantStruct::get( cast(Ty), {Constant::getAllOnesValue(Ty->getStructElementType(0)), Constant::getNullValue(Ty->getStructElementType(1))}); } [[fallthrough]]; case Intrinsic::smul_with_overflow: case Intrinsic::umul_with_overflow: { // undef * X -> { 0, false } // X * undef -> { 0, false } if (!C0 || !C1) return Constant::getNullValue(Ty); APInt Res; bool Overflow; switch (IntrinsicID) { default: llvm_unreachable("Invalid case"); case Intrinsic::sadd_with_overflow: Res = C0->sadd_ov(*C1, Overflow); break; case Intrinsic::uadd_with_overflow: Res = C0->uadd_ov(*C1, Overflow); break; case Intrinsic::ssub_with_overflow: Res = C0->ssub_ov(*C1, Overflow); break; case Intrinsic::usub_with_overflow: Res = C0->usub_ov(*C1, Overflow); break; case Intrinsic::smul_with_overflow: Res = C0->smul_ov(*C1, Overflow); break; case Intrinsic::umul_with_overflow: Res = C0->umul_ov(*C1, Overflow); break; } Constant *Ops[] = { ConstantInt::get(Ty->getContext(), Res), ConstantInt::get(Type::getInt1Ty(Ty->getContext()), Overflow) }; return ConstantStruct::get(cast(Ty), Ops); } case Intrinsic::uadd_sat: case Intrinsic::sadd_sat: // This is the same as for binary ops - poison propagates. // TODO: Poison handling should be consolidated. if (isa(Operands[0]) || isa(Operands[1])) return PoisonValue::get(Ty); if (!C0 && !C1) return UndefValue::get(Ty); if (!C0 || !C1) return Constant::getAllOnesValue(Ty); if (IntrinsicID == Intrinsic::uadd_sat) return ConstantInt::get(Ty, C0->uadd_sat(*C1)); else return ConstantInt::get(Ty, C0->sadd_sat(*C1)); case Intrinsic::usub_sat: case Intrinsic::ssub_sat: // This is the same as for binary ops - poison propagates. // TODO: Poison handling should be consolidated. if (isa(Operands[0]) || isa(Operands[1])) return PoisonValue::get(Ty); if (!C0 && !C1) return UndefValue::get(Ty); if (!C0 || !C1) return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::usub_sat) return ConstantInt::get(Ty, C0->usub_sat(*C1)); else return ConstantInt::get(Ty, C0->ssub_sat(*C1)); case Intrinsic::cttz: case Intrinsic::ctlz: assert(C1 && "Must be constant int"); // cttz(0, 1) and ctlz(0, 1) are poison. if (C1->isOne() && (!C0 || C0->isZero())) return PoisonValue::get(Ty); if (!C0) return Constant::getNullValue(Ty); if (IntrinsicID == Intrinsic::cttz) return ConstantInt::get(Ty, C0->countr_zero()); else return ConstantInt::get(Ty, C0->countl_zero()); case Intrinsic::abs: assert(C1 && "Must be constant int"); assert((C1->isOne() || C1->isZero()) && "Must be 0 or 1"); // Undef or minimum val operand with poison min --> undef if (C1->isOne() && (!C0 || C0->isMinSignedValue())) return UndefValue::get(Ty); // Undef operand with no poison min --> 0 (sign bit must be clear) if (!C0) return Constant::getNullValue(Ty); return ConstantInt::get(Ty, C0->abs()); case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: return dyn_cast(Operands[0]); } return nullptr; } // Support ConstantVector in case we have an Undef in the top. if ((isa(Operands[0]) || isa(Operands[0])) && // Check for default rounding mode. // FIXME: Support other rounding modes? isa(Operands[1]) && cast(Operands[1])->getValue() == 4) { auto *Op = cast(Operands[0]); switch (IntrinsicID) { default: break; case Intrinsic::x86_avx512_vcvtss2si32: case Intrinsic::x86_avx512_vcvtss2si64: case Intrinsic::x86_avx512_vcvtsd2si32: case Intrinsic::x86_avx512_vcvtsd2si64: if (ConstantFP *FPOp = dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/false, Ty, /*IsSigned*/true); break; case Intrinsic::x86_avx512_vcvtss2usi32: case Intrinsic::x86_avx512_vcvtss2usi64: case Intrinsic::x86_avx512_vcvtsd2usi32: case Intrinsic::x86_avx512_vcvtsd2usi64: if (ConstantFP *FPOp = dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/false, Ty, /*IsSigned*/false); break; case Intrinsic::x86_avx512_cvttss2si: case Intrinsic::x86_avx512_cvttss2si64: case Intrinsic::x86_avx512_cvttsd2si: case Intrinsic::x86_avx512_cvttsd2si64: if (ConstantFP *FPOp = dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty, /*IsSigned*/true); break; case Intrinsic::x86_avx512_cvttss2usi: case Intrinsic::x86_avx512_cvttss2usi64: case Intrinsic::x86_avx512_cvttsd2usi: case Intrinsic::x86_avx512_cvttsd2usi64: if (ConstantFP *FPOp = dyn_cast_or_null(Op->getAggregateElement(0U))) return ConstantFoldSSEConvertToInt(FPOp->getValueAPF(), /*roundTowardZero=*/true, Ty, /*IsSigned*/false); break; } } return nullptr; } static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID, const APFloat &S0, const APFloat &S1, const APFloat &S2) { unsigned ID; const fltSemantics &Sem = S0.getSemantics(); APFloat MA(Sem), SC(Sem), TC(Sem); if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) { if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) { // S2 < 0 ID = 5; SC = -S0; } else { ID = 4; SC = S0; } MA = S2; TC = -S1; } else if (abs(S1) >= abs(S0)) { if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) { // S1 < 0 ID = 3; TC = -S2; } else { ID = 2; TC = S2; } MA = S1; SC = S0; } else { if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) { // S0 < 0 ID = 1; SC = S2; } else { ID = 0; SC = -S2; } MA = S0; TC = -S1; } switch (IntrinsicID) { default: llvm_unreachable("unhandled amdgcn cube intrinsic"); case Intrinsic::amdgcn_cubeid: return APFloat(Sem, ID); case Intrinsic::amdgcn_cubema: return MA + MA; case Intrinsic::amdgcn_cubesc: return SC; case Intrinsic::amdgcn_cubetc: return TC; } } static Constant *ConstantFoldAMDGCNPermIntrinsic(ArrayRef Operands, Type *Ty) { const APInt *C0, *C1, *C2; if (!getConstIntOrUndef(Operands[0], C0) || !getConstIntOrUndef(Operands[1], C1) || !getConstIntOrUndef(Operands[2], C2)) return nullptr; if (!C2) return UndefValue::get(Ty); APInt Val(32, 0); unsigned NumUndefBytes = 0; for (unsigned I = 0; I < 32; I += 8) { unsigned Sel = C2->extractBitsAsZExtValue(8, I); unsigned B = 0; if (Sel >= 13) B = 0xff; else if (Sel == 12) B = 0x00; else { const APInt *Src = ((Sel & 10) == 10 || (Sel & 12) == 4) ? C0 : C1; if (!Src) ++NumUndefBytes; else if (Sel < 8) B = Src->extractBitsAsZExtValue(8, (Sel & 3) * 8); else B = Src->extractBitsAsZExtValue(1, (Sel & 1) ? 31 : 15) * 0xff; } Val.insertBits(B, I, 8); } if (NumUndefBytes == 4) return UndefValue::get(Ty); return ConstantInt::get(Ty, Val); } static Constant *ConstantFoldScalarCall3(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, ArrayRef Operands, const TargetLibraryInfo *TLI, const CallBase *Call) { assert(Operands.size() == 3 && "Wrong number of operands."); if (const auto *Op1 = dyn_cast(Operands[0])) { if (const auto *Op2 = dyn_cast(Operands[1])) { if (const auto *Op3 = dyn_cast(Operands[2])) { const APFloat &C1 = Op1->getValueAPF(); const APFloat &C2 = Op2->getValueAPF(); const APFloat &C3 = Op3->getValueAPF(); if (const auto *ConstrIntr = dyn_cast(Call)) { RoundingMode RM = getEvaluationRoundingMode(ConstrIntr); APFloat Res = C1; APFloat::opStatus St; switch (IntrinsicID) { default: return nullptr; case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_fmuladd: St = Res.fusedMultiplyAdd(C2, C3, RM); break; } if (mayFoldConstrained( const_cast(ConstrIntr), St)) return ConstantFP::get(Ty->getContext(), Res); return nullptr; } switch (IntrinsicID) { default: break; case Intrinsic::amdgcn_fma_legacy: { // The legacy behaviour is that multiplying +/- 0.0 by anything, even // NaN or infinity, gives +0.0. if (C1.isZero() || C2.isZero()) { // It's tempting to just return C3 here, but that would give the // wrong result if C3 was -0.0. return ConstantFP::get(Ty->getContext(), APFloat(0.0f) + C3); } [[fallthrough]]; } case Intrinsic::fma: case Intrinsic::fmuladd: { APFloat V = C1; V.fusedMultiplyAdd(C2, C3, APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), V); } case Intrinsic::amdgcn_cubeid: case Intrinsic::amdgcn_cubema: case Intrinsic::amdgcn_cubesc: case Intrinsic::amdgcn_cubetc: { APFloat V = ConstantFoldAMDGCNCubeIntrinsic(IntrinsicID, C1, C2, C3); return ConstantFP::get(Ty->getContext(), V); } } } } } if (IntrinsicID == Intrinsic::smul_fix || IntrinsicID == Intrinsic::smul_fix_sat) { // poison * C -> poison // C * poison -> poison if (isa(Operands[0]) || isa(Operands[1])) return PoisonValue::get(Ty); const APInt *C0, *C1; if (!getConstIntOrUndef(Operands[0], C0) || !getConstIntOrUndef(Operands[1], C1)) return nullptr; // undef * C -> 0 // C * undef -> 0 if (!C0 || !C1) return Constant::getNullValue(Ty); // This code performs rounding towards negative infinity in case the result // cannot be represented exactly for the given scale. Targets that do care // about rounding should use a target hook for specifying how rounding // should be done, and provide their own folding to be consistent with // rounding. This is the same approach as used by // DAGTypeLegalizer::ExpandIntRes_MULFIX. unsigned Scale = cast(Operands[2])->getZExtValue(); unsigned Width = C0->getBitWidth(); assert(Scale < Width && "Illegal scale."); unsigned ExtendedWidth = Width * 2; APInt Product = (C0->sext(ExtendedWidth) * C1->sext(ExtendedWidth)).ashr(Scale); if (IntrinsicID == Intrinsic::smul_fix_sat) { APInt Max = APInt::getSignedMaxValue(Width).sext(ExtendedWidth); APInt Min = APInt::getSignedMinValue(Width).sext(ExtendedWidth); Product = APIntOps::smin(Product, Max); Product = APIntOps::smax(Product, Min); } return ConstantInt::get(Ty->getContext(), Product.sextOrTrunc(Width)); } if (IntrinsicID == Intrinsic::fshl || IntrinsicID == Intrinsic::fshr) { const APInt *C0, *C1, *C2; if (!getConstIntOrUndef(Operands[0], C0) || !getConstIntOrUndef(Operands[1], C1) || !getConstIntOrUndef(Operands[2], C2)) return nullptr; bool IsRight = IntrinsicID == Intrinsic::fshr; if (!C2) return Operands[IsRight ? 1 : 0]; if (!C0 && !C1) return UndefValue::get(Ty); // The shift amount is interpreted as modulo the bitwidth. If the shift // amount is effectively 0, avoid UB due to oversized inverse shift below. unsigned BitWidth = C2->getBitWidth(); unsigned ShAmt = C2->urem(BitWidth); if (!ShAmt) return Operands[IsRight ? 1 : 0]; // (C0 << ShlAmt) | (C1 >> LshrAmt) unsigned LshrAmt = IsRight ? ShAmt : BitWidth - ShAmt; unsigned ShlAmt = !IsRight ? ShAmt : BitWidth - ShAmt; if (!C0) return ConstantInt::get(Ty, C1->lshr(LshrAmt)); if (!C1) return ConstantInt::get(Ty, C0->shl(ShlAmt)); return ConstantInt::get(Ty, C0->shl(ShlAmt) | C1->lshr(LshrAmt)); } if (IntrinsicID == Intrinsic::amdgcn_perm) return ConstantFoldAMDGCNPermIntrinsic(Operands, Ty); return nullptr; } static Constant *ConstantFoldScalarCall(StringRef Name, Intrinsic::ID IntrinsicID, Type *Ty, ArrayRef Operands, const TargetLibraryInfo *TLI, const CallBase *Call) { if (Operands.size() == 1) return ConstantFoldScalarCall1(Name, IntrinsicID, Ty, Operands, TLI, Call); if (Operands.size() == 2) { if (Constant *FoldedLibCall = ConstantFoldLibCall2(Name, Ty, Operands, TLI)) { return FoldedLibCall; } return ConstantFoldIntrinsicCall2(IntrinsicID, Ty, Operands, Call); } if (Operands.size() == 3) return ConstantFoldScalarCall3(Name, IntrinsicID, Ty, Operands, TLI, Call); return nullptr; } static Constant *ConstantFoldFixedVectorCall( StringRef Name, Intrinsic::ID IntrinsicID, FixedVectorType *FVTy, ArrayRef Operands, const DataLayout &DL, const TargetLibraryInfo *TLI, const CallBase *Call) { SmallVector Result(FVTy->getNumElements()); SmallVector Lane(Operands.size()); Type *Ty = FVTy->getElementType(); switch (IntrinsicID) { case Intrinsic::masked_load: { auto *SrcPtr = Operands[0]; auto *Mask = Operands[2]; auto *Passthru = Operands[3]; Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, FVTy, DL); SmallVector NewElements; for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { auto *MaskElt = Mask->getAggregateElement(I); if (!MaskElt) break; auto *PassthruElt = Passthru->getAggregateElement(I); auto *VecElt = VecData ? VecData->getAggregateElement(I) : nullptr; if (isa(MaskElt)) { if (PassthruElt) NewElements.push_back(PassthruElt); else if (VecElt) NewElements.push_back(VecElt); else return nullptr; } if (MaskElt->isNullValue()) { if (!PassthruElt) return nullptr; NewElements.push_back(PassthruElt); } else if (MaskElt->isOneValue()) { if (!VecElt) return nullptr; NewElements.push_back(VecElt); } else { return nullptr; } } if (NewElements.size() != FVTy->getNumElements()) return nullptr; return ConstantVector::get(NewElements); } case Intrinsic::arm_mve_vctp8: case Intrinsic::arm_mve_vctp16: case Intrinsic::arm_mve_vctp32: case Intrinsic::arm_mve_vctp64: { if (auto *Op = dyn_cast(Operands[0])) { unsigned Lanes = FVTy->getNumElements(); uint64_t Limit = Op->getZExtValue(); SmallVector NCs; for (unsigned i = 0; i < Lanes; i++) { if (i < Limit) NCs.push_back(ConstantInt::getTrue(Ty)); else NCs.push_back(ConstantInt::getFalse(Ty)); } return ConstantVector::get(NCs); } return nullptr; } case Intrinsic::get_active_lane_mask: { auto *Op0 = dyn_cast(Operands[0]); auto *Op1 = dyn_cast(Operands[1]); if (Op0 && Op1) { unsigned Lanes = FVTy->getNumElements(); uint64_t Base = Op0->getZExtValue(); uint64_t Limit = Op1->getZExtValue(); SmallVector NCs; for (unsigned i = 0; i < Lanes; i++) { if (Base + i < Limit) NCs.push_back(ConstantInt::getTrue(Ty)); else NCs.push_back(ConstantInt::getFalse(Ty)); } return ConstantVector::get(NCs); } return nullptr; } default: break; } for (unsigned I = 0, E = FVTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { // Some intrinsics use a scalar type for certain arguments. if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) { Lane[J] = Operands[J]; continue; } Constant *Agg = Operands[J]->getAggregateElement(I); if (!Agg) return nullptr; Lane[J] = Agg; } // Use the regular scalar folding to simplify this column. Constant *Folded = ConstantFoldScalarCall(Name, IntrinsicID, Ty, Lane, TLI, Call); if (!Folded) return nullptr; Result[I] = Folded; } return ConstantVector::get(Result); } static Constant *ConstantFoldScalableVectorCall( StringRef Name, Intrinsic::ID IntrinsicID, ScalableVectorType *SVTy, ArrayRef Operands, const DataLayout &DL, const TargetLibraryInfo *TLI, const CallBase *Call) { switch (IntrinsicID) { case Intrinsic::aarch64_sve_convert_from_svbool: { auto *Src = dyn_cast(Operands[0]); if (!Src || !Src->isNullValue()) break; return ConstantInt::getFalse(SVTy); } default: break; } return nullptr; } static std::pair ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) { if (isa(Op)) return {Op, PoisonValue::get(IntTy)}; auto *ConstFP = dyn_cast(Op); if (!ConstFP) return {}; const APFloat &U = ConstFP->getValueAPF(); int FrexpExp; APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven); Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant); // The exponent is an "unspecified value" for inf/nan. We use zero to avoid // using undef. Constant *Result1 = FrexpMant.isFinite() ? ConstantInt::get(IntTy, FrexpExp) : ConstantInt::getNullValue(IntTy); return {Result0, Result1}; } /// Handle intrinsics that return tuples, which may be tuples of vectors. static Constant * ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID, StructType *StTy, ArrayRef Operands, const DataLayout &DL, const TargetLibraryInfo *TLI, const CallBase *Call) { switch (IntrinsicID) { case Intrinsic::frexp: { Type *Ty0 = StTy->getContainedType(0); Type *Ty1 = StTy->getContainedType(1)->getScalarType(); if (auto *FVTy0 = dyn_cast(Ty0)) { SmallVector Results0(FVTy0->getNumElements()); SmallVector Results1(FVTy0->getNumElements()); for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) { Constant *Lane = Operands[0]->getAggregateElement(I); std::tie(Results0[I], Results1[I]) = ConstantFoldScalarFrexpCall(Lane, Ty1); if (!Results0[I]) return nullptr; } return ConstantStruct::get(StTy, ConstantVector::get(Results0), ConstantVector::get(Results1)); } auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1); if (!Result0) return nullptr; return ConstantStruct::get(StTy, Result0, Result1); } default: // TODO: Constant folding of vector intrinsics that fall through here does // not work (e.g. overflow intrinsics) return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call); } return nullptr; } } // end anonymous namespace Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS, Constant *RHS, Type *Ty, Instruction *FMFSource) { return ConstantFoldIntrinsicCall2(ID, Ty, {LHS, RHS}, dyn_cast_if_present(FMFSource)); } Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F, ArrayRef Operands, const TargetLibraryInfo *TLI, bool AllowNonDeterministic) { if (Call->isNoBuiltin()) return nullptr; if (!F->hasName()) return nullptr; // If this is not an intrinsic and not recognized as a library call, bail out. Intrinsic::ID IID = F->getIntrinsicID(); if (IID == Intrinsic::not_intrinsic) { if (!TLI) return nullptr; LibFunc LibF; if (!TLI->getLibFunc(*F, LibF)) return nullptr; } // Conservatively assume that floating-point libcalls may be // non-deterministic. Type *Ty = F->getReturnType(); if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy()) return nullptr; StringRef Name = F->getName(); if (auto *FVTy = dyn_cast(Ty)) return ConstantFoldFixedVectorCall( Name, IID, FVTy, Operands, F->getDataLayout(), TLI, Call); if (auto *SVTy = dyn_cast(Ty)) return ConstantFoldScalableVectorCall( Name, IID, SVTy, Operands, F->getDataLayout(), TLI, Call); if (auto *StTy = dyn_cast(Ty)) return ConstantFoldStructCall(Name, IID, StTy, Operands, F->getDataLayout(), TLI, Call); // TODO: If this is a library function, we already discovered that above, // so we should pass the LibFunc, not the name (and it might be better // still to separate intrinsic handling from libcalls). return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call); } bool llvm::isMathLibCallNoop(const CallBase *Call, const TargetLibraryInfo *TLI) { // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap // (and to some extent ConstantFoldScalarCall). if (Call->isNoBuiltin() || Call->isStrictFP()) return false; Function *F = Call->getCalledFunction(); if (!F) return false; LibFunc Func; if (!TLI || !TLI->getLibFunc(*F, Func)) return false; if (Call->arg_size() == 1) { if (ConstantFP *OpC = dyn_cast(Call->getArgOperand(0))) { const APFloat &Op = OpC->getValueAPF(); switch (Func) { case LibFunc_logl: case LibFunc_log: case LibFunc_logf: case LibFunc_log2l: case LibFunc_log2: case LibFunc_log2f: case LibFunc_log10l: case LibFunc_log10: case LibFunc_log10f: return Op.isNaN() || (!Op.isZero() && !Op.isNegative()); case LibFunc_expl: case LibFunc_exp: case LibFunc_expf: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) return !(Op < APFloat(-745.0) || Op > APFloat(709.0)); if (OpC->getType()->isFloatTy()) return !(Op < APFloat(-103.0f) || Op > APFloat(88.0f)); break; case LibFunc_exp2l: case LibFunc_exp2: case LibFunc_exp2f: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) return !(Op < APFloat(-1074.0) || Op > APFloat(1023.0)); if (OpC->getType()->isFloatTy()) return !(Op < APFloat(-149.0f) || Op > APFloat(127.0f)); break; case LibFunc_sinl: case LibFunc_sin: case LibFunc_sinf: case LibFunc_cosl: case LibFunc_cos: case LibFunc_cosf: return !Op.isInfinity(); case LibFunc_tanl: case LibFunc_tan: case LibFunc_tanf: { // FIXME: Stop using the host math library. // FIXME: The computation isn't done in the right precision. Type *Ty = OpC->getType(); if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) return ConstantFoldFP(tan, OpC->getValueAPF(), Ty) != nullptr; break; } case LibFunc_atan: case LibFunc_atanf: case LibFunc_atanl: // Per POSIX, this MAY fail if Op is denormal. We choose not failing. return true; case LibFunc_asinl: case LibFunc_asin: case LibFunc_asinf: case LibFunc_acosl: case LibFunc_acos: case LibFunc_acosf: return !(Op < APFloat(Op.getSemantics(), "-1") || Op > APFloat(Op.getSemantics(), "1")); case LibFunc_sinh: case LibFunc_cosh: case LibFunc_sinhf: case LibFunc_coshf: case LibFunc_sinhl: case LibFunc_coshl: // FIXME: These boundaries are slightly conservative. if (OpC->getType()->isDoubleTy()) return !(Op < APFloat(-710.0) || Op > APFloat(710.0)); if (OpC->getType()->isFloatTy()) return !(Op < APFloat(-89.0f) || Op > APFloat(89.0f)); break; case LibFunc_sqrtl: case LibFunc_sqrt: case LibFunc_sqrtf: return Op.isNaN() || Op.isZero() || !Op.isNegative(); // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p, // maybe others? default: break; } } } if (Call->arg_size() == 2) { ConstantFP *Op0C = dyn_cast(Call->getArgOperand(0)); ConstantFP *Op1C = dyn_cast(Call->getArgOperand(1)); if (Op0C && Op1C) { const APFloat &Op0 = Op0C->getValueAPF(); const APFloat &Op1 = Op1C->getValueAPF(); switch (Func) { case LibFunc_powl: case LibFunc_pow: case LibFunc_powf: { // FIXME: Stop using the host math library. // FIXME: The computation isn't done in the right precision. Type *Ty = Op0C->getType(); if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) { if (Ty == Op1C->getType()) return ConstantFoldBinaryFP(pow, Op0, Op1, Ty) != nullptr; } break; } case LibFunc_fmodl: case LibFunc_fmod: case LibFunc_fmodf: case LibFunc_remainderl: case LibFunc_remainder: case LibFunc_remainderf: return Op0.isNaN() || Op1.isNaN() || (!Op0.isInfinity() && !Op1.isZero()); case LibFunc_atan2: case LibFunc_atan2f: case LibFunc_atan2l: // Although IEEE-754 says atan2(+/-0.0, +/-0.0) are well-defined, and // GLIBC and MSVC do not appear to raise an error on those, we // cannot rely on that behavior. POSIX and C11 say that a domain error // may occur, so allow for that possibility. return !Op0.isZero() || !Op1.isZero(); default: break; } } } return false; } void TargetFolder::anchor() {} diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h index 00239ff94b7b..6d301efd5618 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMSubtarget.h @@ -1,502 +1,504 @@ //===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file declares the ARM specific subclass of TargetSubtargetInfo. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H #define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSelectionDAGInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/Triple.h" #include #include #include #define GET_SUBTARGETINFO_HEADER #include "ARMGenSubtargetInfo.inc" namespace llvm { class ARMBaseTargetMachine; class GlobalValue; class StringRef; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, #define ARM_PROCESSOR_FAMILY(ENUM) ENUM, #include "llvm/TargetParser/ARMTargetParserDef.inc" #undef ARM_PROCESSOR_FAMILY }; enum ARMProcClassEnum { None, AClass, MClass, RClass }; enum ARMArchEnum { #define ARM_ARCHITECTURE(ENUM) ENUM, #include "llvm/TargetParser/ARMTargetParserDef.inc" #undef ARM_ARCHITECTURE }; public: /// What kind of timing do load multiple/store multiple instructions have. enum ARMLdStMultipleTiming { /// Can load/store 2 registers/cycle. DoubleIssue, /// Can load/store 2 registers/cycle, but needs an extra cycle if the access /// is not 64-bit aligned. DoubleIssueCheckUnalignedAccess, /// Can load/store 1 register/cycle. SingleIssue, /// Can load/store 1 register/cycle, but needs an extra cycle for address /// computation and potentially also for register writeback. SingleIssuePlusExtras, }; protected: // Bool members corresponding to the SubtargetFeatures defined in tablegen #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool ATTRIBUTE = DEFAULT; #include "ARMGenSubtargetInfo.inc" /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. ARMProcFamilyEnum ARMProcFamily = Others; /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass. ARMProcClassEnum ARMProcClass = None; /// ARMArch - ARM architecture ARMArchEnum ARMArch = ARMv4t; /// UseMulOps - True if non-microcoded fused integer multiply-add and /// multiply-subtract instructions should be used. bool UseMulOps = false; /// SupportsTailCall - True if the OS supports tail call. The dynamic linker /// must be able to synthesize call stubs for interworking between ARM and /// Thumb. bool SupportsTailCall = false; /// RestrictIT - If true, the subtarget disallows generation of complex IT /// blocks. bool RestrictIT = false; /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS). bool UseSjLjEH = false; /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. Align stackAlignment = Align(4); /// CPUString - String name of used CPU. std::string CPUString; unsigned MaxInterleaveFactor = 1; /// Clearance before partial register updates (in number of instructions) unsigned PartialUpdateClearance = 0; /// What kind of timing do load multiple/store multiple have (double issue, /// single issue etc). ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue; /// The adjustment that we need to apply to get the operand latency from the /// operand cycle returned by the itinerary data for pre-ISel operands. int PreISelOperandLatencyAdjustment = 2; /// What alignment is preferred for loop bodies and functions, in log2(bytes). unsigned PrefLoopLogAlignment = 0; /// The cost factor for MVE instructions, representing the multiple beats an // instruction can take. The default is 2, (set in initSubtargetFeatures so // that we can use subtarget features less than 2). unsigned MVEVectorCostFactor = 0; /// OptMinSize - True if we're optimising for minimum code size, equal to /// the function attribute. bool OptMinSize = false; /// IsLittle - The target is Little Endian bool IsLittle; /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; /// SchedModel - Processor specific instruction costs. MCSchedModel SchedModel; /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; /// Options passed via command line that could influence the target const TargetOptions &Options; const ARMBaseTargetMachine &TM; public: /// This constructor initializes the data members to match that /// of the specified triple. /// ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle, bool MinSize = false); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { return 64; } /// getMaxMemcpyTPInlineSizeThreshold - Returns the maximum size /// that still makes it profitable to inline a llvm.memcpy as a Tail /// Predicated loop. /// This threshold should only be used for constant size inputs. unsigned getMaxMemcpyTPInlineSizeThreshold() const { return 128; } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS); /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; } const ARMBaseInstrInfo *getInstrInfo() const override { return InstrInfo.get(); } const ARMTargetLowering *getTargetLowering() const override { return &TLInfo; } const ARMFrameLowering *getFrameLowering() const override { return FrameLowering.get(); } const ARMBaseRegisterInfo *getRegisterInfo() const override { return &InstrInfo->getRegisterInfo(); } /// The correct instructions have been implemented to initialize undef /// registers, therefore the ARM Architecture is supported by the Init Undef /// Pass. This will return true as the pass needs to be supported for all /// types of instructions. The pass will then perform more checks to ensure it /// should be applying the Pseudo Instructions. bool supportsInitUndef() const override { return true; } const CallLowering *getCallLowering() const override; InstructionSelector *getInstructionSelector() const override; const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; private: ARMSelectionDAGInfo TSInfo; // Either Thumb1FrameLowering or ARMFrameLowering. std::unique_ptr FrameLowering; // Either Thumb1InstrInfo or Thumb2InstrInfo. std::unique_ptr InstrInfo; ARMTargetLowering TLInfo; /// GlobalISel related APIs. std::unique_ptr CallLoweringInfo; std::unique_ptr InstSelector; std::unique_ptr Legalizer; std::unique_ptr RegBankInfo; void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS); std::bitset<8> CoprocCDE = {}; public: // Getters for SubtargetFeatures defined in tablegen #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool GETTER() const { return ATTRIBUTE; } #include "ARMGenSubtargetInfo.inc" /// @{ /// These functions are obsolete, please consider adding subtarget features /// or properties instead of calling them. bool isCortexA5() const { return ARMProcFamily == CortexA5; } bool isCortexA7() const { return ARMProcFamily == CortexA7; } bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } bool isCortexA15() const { return ARMProcFamily == CortexA15; } bool isSwift() const { return ARMProcFamily == Swift; } bool isCortexM3() const { return ARMProcFamily == CortexM3; } bool isCortexM7() const { return ARMProcFamily == CortexM7; } bool isLikeA9() const { return isCortexA9() || isCortexA15() || isKrait(); } bool isCortexR5() const { return ARMProcFamily == CortexR5; } bool isKrait() const { return ARMProcFamily == Krait; } /// @} bool hasARMOps() const { return !NoARM; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && hasNEONForFP(); } bool hasVFP2Base() const { return hasVFPv2SP(); } bool hasVFP3Base() const { return hasVFPv3D16SP(); } bool hasVFP4Base() const { return hasVFPv4D16SP(); } bool hasFPARMv8Base() const { return hasFPARMv8D16SP(); } bool hasAnyDataBarrier() const { return HasDataBarrier || (hasV6Ops() && !isThumb()); } bool useMulOps() const { return UseMulOps; } bool useFPVMLx() const { return !SlowFPVMLx; } bool useFPVFMx() const { return !isTargetDarwin() && hasVFP4Base() && !SlowFPVFMx; } bool useFPVFMx16() const { return useFPVFMx() && hasFullFP16(); } bool useFPVFMx64() const { return useFPVFMx() && hasFP64(); } bool useSjLjEH() const { return UseSjLjEH; } bool hasBaseDSP() const { if (isThumb()) return hasThumb2() && hasDSP(); else return hasV5TEOps(); } /// Return true if the CPU supports any kind of instruction fusion. bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); } const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); } bool isTargetWatchABI() const { return TargetTriple.isWatchABI(); } bool isTargetDriverKit() const { return TargetTriple.isDriverKit(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } bool isTargetWindows() const { return TargetTriple.isOSWindows(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } // ARM EABI is the bare-metal EABI described in ARM ABI documents and // can be accessed via -target arm-none-eabi. This is NOT GNUEABI. // FIXME: Add a flag for bare-metal for that target and set Triple::EABI // even for GNUEABI, so we can make a distinction here and still conform to // the EABI on GNU (and Android) mode. This requires change in Clang, too. // FIXME: The Darwin exception is temporary, while we move users to // "*-*-*-macho" triples as quickly as possible. bool isTargetAEABI() const { return (TargetTriple.getEnvironment() == Triple::EABI || TargetTriple.getEnvironment() == Triple::EABIHF) && !isTargetDarwin() && !isTargetWindows(); } bool isTargetGNUAEABI() const { return (TargetTriple.getEnvironment() == Triple::GNUEABI || - TargetTriple.getEnvironment() == Triple::GNUEABIHF) && + TargetTriple.getEnvironment() == Triple::GNUEABIT64 || + TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::GNUEABIHFT64) && !isTargetDarwin() && !isTargetWindows(); } bool isTargetMuslAEABI() const { return (TargetTriple.getEnvironment() == Triple::MuslEABI || TargetTriple.getEnvironment() == Triple::MuslEABIHF || TargetTriple.getEnvironment() == Triple::OpenHOS) && !isTargetDarwin() && !isTargetWindows(); } // ARM Targets that support EHABI exception handling standard // Darwin uses SjLj. Other targets might need more checks. bool isTargetEHABICompatible() const { return TargetTriple.isTargetEHABICompatible(); } bool isTargetHardFloat() const; bool isReadTPSoft() const { return !(isReadTPTPIDRURW() || isReadTPTPIDRURO() || isReadTPTPIDRPRW()); } bool isTargetAndroid() const { return TargetTriple.isAndroid(); } bool isXRaySupported() const override; bool isAPCS_ABI() const; bool isAAPCS_ABI() const; bool isAAPCS16_ABI() const; bool isROPI() const; bool isRWPI() const; bool useMachineScheduler() const { return UseMISched; } bool useMachinePipeliner() const { return UseMIPipeliner; } bool hasMinSize() const { return OptMinSize; } bool isThumb1Only() const { return isThumb() && !hasThumb2(); } bool isThumb2() const { return isThumb() && hasThumb2(); } bool isMClass() const { return ARMProcClass == MClass; } bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } bool isR9Reserved() const { return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; } MCPhysReg getFramePointerReg() const { if (isTargetDarwin() || (!isTargetWindows() && isThumb() && !createAAPCSFrameChain())) return ARM::R7; return ARM::R11; } /// Returns true if the frame setup is split into two separate pushes (first /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent /// to lr. This is always required on Thumb1-only targets, as the push and /// pop instructions can't access the high registers. bool splitFramePushPop(const MachineFunction &MF) const { if (MF.getInfo()->shouldSignReturnAddress()) return true; return (getFramePointerReg() == ARM::R7 && MF.getTarget().Options.DisableFramePointerElim(MF)) || isThumb1Only(); } bool splitFramePointerPush(const MachineFunction &MF) const; bool useStride4VFPs() const; bool useMovt() const; bool supportsTailCall() const { return SupportsTailCall; } bool allowsUnalignedMem() const { return !StrictAlign; } bool restrictIT() const { return RestrictIT; } const std::string & getCPUString() const { return CPUString; } bool isLittle() const { return IsLittle; } unsigned getMispredictionPenalty() const; /// Returns true if machine scheduler should be enabled. bool enableMachineScheduler() const override; /// Returns true if machine pipeliner should be enabled. bool enableMachinePipeliner() const override; bool useDFAforSMS() const override; /// True for some subtargets at > -O0. bool enablePostRAScheduler() const override; /// True for some subtargets at > -O0. bool enablePostRAMachineScheduler() const override; /// Check whether this subtarget wants to use subregister liveness. bool enableSubRegLiveness() const override; /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). bool useAA() const override { return true; } /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; } /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. Align getStackAlignment() const { return stackAlignment; } // Returns the required alignment for LDRD/STRD instructions Align getDualLoadStoreAlignment() const { return Align(hasV7Ops() || allowsUnalignedMem() ? 4 : 8); } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; } ARMLdStMultipleTiming getLdStMultipleTiming() const { return LdStMultipleTiming; } int getPreISelOperandLatencyAdjustment() const { return PreISelOperandLatencyAdjustment; } /// True if the GV will be accessed via an indirect symbol. bool isGVIndirectSymbol(const GlobalValue *GV) const; /// Returns the constant pool modifier needed to access the GV. bool isGVInGOT(const GlobalValue *GV) const; /// True if fast-isel is used. bool useFastISel() const; /// Returns the correct return opcode for the current feature set. /// Use BX if available to allow mixing thumb/arm code, but fall back /// to plain mov pc,lr on ARMv4. unsigned getReturnOpcode() const { if (isThumb()) return ARM::tBX_RET; if (hasV4TOps()) return ARM::BX_RET; return ARM::MOVPCLR; } /// Allow movt+movw for PIC global address calculation. /// ELF does not have GOT relocations for movt+movw. /// ROPI does not use GOT. bool allowPositionIndependentMovt() const { return isROPI() || !isTargetELF(); } unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; } unsigned getMVEVectorCostFactor(TargetTransformInfo::TargetCostKind CostKind) const { if (CostKind == TargetTransformInfo::TCK_CodeSize) return 1; return MVEVectorCostFactor; } bool ignoreCSRForAllocationOrder(const MachineFunction &MF, unsigned PhysReg) const override; unsigned getGPRAllocationOrder(const MachineFunction &MF) const; }; } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 7553778c5740..a58c63dcf762 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -1,642 +1,644 @@ //===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // //===----------------------------------------------------------------------===// #include "ARMTargetMachine.h" #include "ARM.h" #include "ARMMachineFunctionInfo.h" #include "ARMMacroFusion.h" #include "ARMSubtarget.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "TargetInfo/ARMTargetInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/ExecutionDomainFix.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterBankInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/ARMTargetParser.h" #include "llvm/TargetParser/TargetParser.h" #include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/CFGuard.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" #include #include #include #include using namespace llvm; static cl::opt DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, cl::desc("Inhibit optimization of S->D register accesses on A15"), cl::init(false)); static cl::opt EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden, cl::desc("Run SimplifyCFG after expanding atomic operations" " to make use of cmpxchg flow-based information"), cl::init(true)); static cl::opt EnableARMLoadStoreOpt("arm-load-store-opt", cl::Hidden, cl::desc("Enable ARM load/store optimization pass"), cl::init(true)); // FIXME: Unify control over GlobalMerge. static cl::opt EnableGlobalMerge("arm-global-merge", cl::Hidden, cl::desc("Enable the global merge pass")); namespace llvm { void initializeARMExecutionDomainFixPass(PassRegistry&); } extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine X(getTheARMLETarget()); RegisterTargetMachine A(getTheThumbLETarget()); RegisterTargetMachine Y(getTheARMBETarget()); RegisterTargetMachine B(getTheThumbBETarget()); PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeGlobalISel(Registry); initializeARMLoadStoreOptPass(Registry); initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMParallelDSPPass(Registry); initializeARMBranchTargetsPass(Registry); initializeARMConstantIslandsPass(Registry); initializeARMExecutionDomainFixPass(Registry); initializeARMExpandPseudoPass(Registry); initializeThumb2SizeReducePass(Registry); initializeMVEVPTBlockPass(Registry); initializeMVETPAndVPTOptimisationsPass(Registry); initializeMVETailPredicationPass(Registry); initializeARMLowOverheadLoopsPass(Registry); initializeARMBlockPlacementPass(Registry); initializeMVEGatherScatterLoweringPass(Registry); initializeARMSLSHardeningPass(Registry); initializeMVELaneInterleavingPass(Registry); initializeARMFixCortexA57AES1742098Pass(Registry); initializeARMDAGToDAGISelLegacyPass(Registry); } static std::unique_ptr createTLOF(const Triple &TT) { if (TT.isOSBinFormatMachO()) return std::make_unique(); if (TT.isOSWindows()) return std::make_unique(); return std::make_unique(); } static ARMBaseTargetMachine::ARMABI computeTargetABI(const Triple &TT, StringRef CPU, const TargetOptions &Options) { StringRef ABIName = Options.MCOptions.getABIName(); if (ABIName.empty()) ABIName = ARM::computeDefaultTargetABI(TT, CPU); if (ABIName == "aapcs16") return ARMBaseTargetMachine::ARM_ABI_AAPCS16; else if (ABIName.starts_with("aapcs")) return ARMBaseTargetMachine::ARM_ABI_AAPCS; else if (ABIName.starts_with("apcs")) return ARMBaseTargetMachine::ARM_ABI_APCS; llvm_unreachable("Unhandled/unknown ABI Name!"); return ARMBaseTargetMachine::ARM_ABI_UNKNOWN; } static std::string computeDataLayout(const Triple &TT, StringRef CPU, const TargetOptions &Options, bool isLittle) { auto ABI = computeTargetABI(TT, CPU, Options); std::string Ret; if (isLittle) // Little endian. Ret += "e"; else // Big endian. Ret += "E"; Ret += DataLayout::getManglingComponent(TT); // Pointers are 32 bits and aligned to 32 bits. Ret += "-p:32:32"; // Function pointers are aligned to 8 bits (because the LSB stores the // ARM/Thumb state). Ret += "-Fi8"; // ABIs other than APCS have 64 bit integers with natural alignment. if (ABI != ARMBaseTargetMachine::ARM_ABI_APCS) Ret += "-i64:64"; // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 // bits, others to 64 bits. We always try to align to 64 bits. if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) Ret += "-f64:32:64"; // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others // to 64. We always ty to give them natural alignment. if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS) Ret += "-v64:32:64-v128:32:128"; else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16) Ret += "-v128:64:128"; // Try to align aggregates to 32 bits (the default is 64 bits, which has no // particular hardware support on 32-bit ARM). Ret += "-a:0:32"; // Integer registers are 32 bits. Ret += "-n32"; // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit // aligned everywhere else. if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16) Ret += "-S128"; else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS) Ret += "-S64"; else Ret += "-S32"; return Ret; } static Reloc::Model getEffectiveRelocModel(const Triple &TT, std::optional RM) { if (!RM) // Default relocation model on Darwin is PIC. return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static; if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI) assert(TT.isOSBinFormatELF() && "ROPI/RWPI currently only supported for ELF"); // DynamicNoPIC is only used on darwin. if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin()) return Reloc::Static; return *RM; } /// Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool isLittle) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), getEffectiveCodeModel(CM, CodeModel::Small), OL), TargetABI(computeTargetABI(TT, CPU, Options)), TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) { // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) { if (isTargetHardFloat()) this->Options.FloatABIType = FloatABI::Hard; else this->Options.FloatABIType = FloatABI::Soft; } // Default to triple-appropriate EABI if (Options.EABIVersion == EABI::Default || Options.EABIVersion == EABI::Unknown) { // musl is compatible with glibc with regard to EABI version if ((TargetTriple.getEnvironment() == Triple::GNUEABI || + TargetTriple.getEnvironment() == Triple::GNUEABIT64 || TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::GNUEABIHFT64 || TargetTriple.getEnvironment() == Triple::MuslEABI || TargetTriple.getEnvironment() == Triple::MuslEABIHF || TargetTriple.getEnvironment() == Triple::OpenHOS) && !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) this->Options.EABIVersion = EABI::GNU; else this->Options.EABIVersion = EABI::EABI5; } if (TT.isOSBinFormatMachO()) { this->Options.TrapUnreachable = true; this->Options.NoTrapAfterNoreturn = true; } // ARM supports the debug entry values. setSupportsDebugEntryValues(true); initAsmInfo(); // ARM supports the MachineOutliner. setMachineOutliner(true); setSupportsDefaultOutlining(true); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; MachineFunctionInfo *ARMBaseTargetMachine::createMachineFunctionInfo( BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const { return ARMFunctionInfo::create( Allocator, F, static_cast(STI)); } const ARMSubtarget * ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; // FIXME: This is related to the code below to reset the target options, // we need to know whether or not the soft float flag is set on the // function before we can generate a subtarget. We also need to use // it as a key for the subtarget since that can be the only difference // between two functions. bool SoftFloat = F.getFnAttribute("use-soft-float").getValueAsBool(); // If the soft float attribute is set on the function turn on the soft float // subtarget feature. if (SoftFloat) FS += FS.empty() ? "+soft-float" : ",+soft-float"; // Use the optminsize to identify the subtarget, but don't use it in the // feature string. std::string Key = CPU + FS; if (F.hasMinSize()) Key += "+minsize"; auto &I = SubtargetMap[Key]; if (!I) { // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); I = std::make_unique(TargetTriple, CPU, FS, *this, isLittle, F.hasMinSize()); if (!I->isThumb() && !I->hasARMOps()) F.getContext().emitError("Function '" + F.getName() + "' uses ARM " "instructions, but the target does not support ARM mode execution."); } return I.get(); } TargetTransformInfo ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(ARMTTIImpl(this, F)); } ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { /// ARM Code Generator Pass Configuration Options. class ARMPassConfig : public TargetPassConfig { public: ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} ARMBaseTargetMachine &getARMTargetMachine() const { return getTM(); } ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMILive *DAG = createGenericSchedLive(C); // add DAG Mutations here. const ARMSubtarget &ST = C->MF->getSubtarget(); if (ST.hasFusion()) DAG->addMutation(createARMMacroFusionDAGMutation()); return DAG; } ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMI *DAG = createGenericSchedPostRA(C); // add DAG Mutations here. const ARMSubtarget &ST = C->MF->getSubtarget(); if (ST.hasFusion()) DAG->addMutation(createARMMacroFusionDAGMutation()); return DAG; } void addIRPasses() override; void addCodeGenPrepare() override; bool addPreISel() override; bool addInstSelector() override; bool addIRTranslator() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; void addPreEmitPass2() override; std::unique_ptr getCSEConfig() const override; }; class ARMExecutionDomainFix : public ExecutionDomainFix { public: static char ID; ARMExecutionDomainFix() : ExecutionDomainFix(ID, ARM::DPRRegClass) {} StringRef getPassName() const override { return "ARM Execution Domain Fix"; } }; char ARMExecutionDomainFix::ID; } // end anonymous namespace INITIALIZE_PASS_BEGIN(ARMExecutionDomainFix, "arm-execution-domain-fix", "ARM Execution Domain Fix", false, false) INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis) INITIALIZE_PASS_END(ARMExecutionDomainFix, "arm-execution-domain-fix", "ARM Execution Domain Fix", false, false) TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(*this, PM); } std::unique_ptr ARMPassConfig::getCSEConfig() const { return getStandardCSEConfigForOpt(TM->getOptLevel()); } void ARMPassConfig::addIRPasses() { if (TM->Options.ThreadModel == ThreadModel::Single) addPass(createLowerAtomicPass()); else addPass(createAtomicExpandLegacyPass()); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOptLevel::None && EnableAtomicTidy) addPass(createCFGSimplificationPass( SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true), [this](const Function &F) { const auto &ST = this->TM->getSubtarget(F); return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); })); addPass(createMVEGatherScatterLoweringPass()); addPass(createMVELaneInterleavingPass()); TargetPassConfig::addIRPasses(); // Run the parallel DSP pass. if (getOptLevel() == CodeGenOptLevel::Aggressive) addPass(createARMParallelDSPPass()); // Match complex arithmetic patterns if (TM->getOptLevel() >= CodeGenOptLevel::Default) addPass(createComplexDeinterleavingPass(TM)); // Match interleaved memory accesses to ldN/stN intrinsics. if (TM->getOptLevel() != CodeGenOptLevel::None) addPass(createInterleavedAccessPass()); // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); if (TM->Options.JMCInstrument) addPass(createJMCInstrumenterPass()); } void ARMPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOptLevel::None) addPass(createTypePromotionLegacyPass()); TargetPassConfig::addCodeGenPrepare(); } bool ARMPassConfig::addPreISel() { if ((TM->getOptLevel() != CodeGenOptLevel::None && EnableGlobalMerge == cl::BOU_UNSET) || EnableGlobalMerge == cl::BOU_TRUE) { // FIXME: This is using the thumb1 only constant value for // maximal global offset for merging globals. We may want // to look into using the old value for non-thumb1 code of // 4095 based on the TargetMachine, but this starts to become // tricky when doing code gen per function. bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOptLevel::Aggressive) && (EnableGlobalMerge == cl::BOU_UNSET); // Merging of extern globals is enabled by default on non-Mach-O as we // expect it to be generally either beneficial or harmless. On Mach-O it // is disabled as we emit the .subsections_via_symbols directive which // means that merging extern globals is not safe. bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize, MergeExternalByDefault)); } if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createHardwareLoopsLegacyPass()); addPass(createMVETailPredicationPass()); // FIXME: IR passes can delete address-taken basic blocks, deleting // corresponding blockaddresses. ARMConstantPoolConstant holds references to // address-taken basic blocks which can be invalidated if the function // containing the blockaddress has already been codegen'd and the basic // block is removed. Work around this by forcing all IR passes to run before // any ISel takes place. We should have a more principled way of handling // this. See D99707 for more details. addPass(createBarrierNoopPass()); } return false; } bool ARMPassConfig::addInstSelector() { addPass(createARMISelDag(getARMTargetMachine(), getOptLevel())); return false; } bool ARMPassConfig::addIRTranslator() { addPass(new IRTranslator(getOptLevel())); return false; } bool ARMPassConfig::addLegalizeMachineIR() { addPass(new Legalizer()); return false; } bool ARMPassConfig::addRegBankSelect() { addPass(new RegBankSelect()); return false; } bool ARMPassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect(getOptLevel())); return false; } void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOptLevel::None) { if (getOptLevel() == CodeGenOptLevel::Aggressive) addPass(&MachinePipelinerID); addPass(createMVETPAndVPTOptimisationsPass()); addPass(createMLxExpansionPass()); if (EnableARMLoadStoreOpt) addPass(createARMLoadStoreOptimizationPass(/* pre-register alloc */ true)); if (!DisableA15SDOptimization) addPass(createA15SDOptimizerPass()); } } void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOptLevel::None) { if (EnableARMLoadStoreOpt) addPass(createARMLoadStoreOptimizationPass()); addPass(new ARMExecutionDomainFix()); addPass(createBreakFalseDeps()); } // Expand some pseudo instructions into multiple instructions to allow // proper scheduling. addPass(createARMExpandPseudoPass()); if (getOptLevel() != CodeGenOptLevel::None) { // When optimising for size, always run the Thumb2SizeReduction pass before // IfConversion. Otherwise, check whether IT blocks are restricted // (e.g. in v8, IfConversion depends on Thumb instruction widths) addPass(createThumb2SizeReductionPass([this](const Function &F) { return this->TM->getSubtarget(F).hasMinSize() || this->TM->getSubtarget(F).restrictIT(); })); addPass(createIfConverter([](const MachineFunction &MF) { return !MF.getSubtarget().isThumb1Only(); })); } addPass(createThumb2ITBlockPass()); // Add both scheduling passes to give the subtarget an opportunity to pick // between them. if (getOptLevel() != CodeGenOptLevel::None) { addPass(&PostMachineSchedulerID); addPass(&PostRASchedulerID); } addPass(createMVEVPTBlockPass()); addPass(createARMIndirectThunks()); addPass(createARMSLSHardeningPass()); } void ARMPassConfig::addPreEmitPass() { addPass(createThumb2SizeReductionPass()); // Constant island pass work on unbundled instructions. addPass(createUnpackMachineBundles([](const MachineFunction &MF) { return MF.getSubtarget().isThumb2(); })); // Don't optimize barriers or block placement at -O0. if (getOptLevel() != CodeGenOptLevel::None) { addPass(createARMBlockPlacementPass()); addPass(createARMOptimizeBarriersPass()); } } void ARMPassConfig::addPreEmitPass2() { // Inserts fixup instructions before unsafe AES operations. Instructions may // be inserted at the start of blocks and at within blocks so this pass has to // come before those below. addPass(createARMFixCortexA57AES1742098Pass()); // Inserts BTIs at the start of functions and indirectly-called basic blocks, // so passes cannot add to the start of basic blocks once this has run. addPass(createARMBranchTargetsPass()); // Inserts Constant Islands. Block sizes cannot be increased after this point, // as this may push the branch ranges and load offsets of accessing constant // pools out of range.. addPass(createARMConstantIslandPass()); // Finalises Low-Overhead Loops. This replaces pseudo instructions with real // instructions, but the pseudos all have conservative sizes so that block // sizes will only be decreased by this pass. addPass(createARMLowOverheadLoopsPass()); if (TM->getTargetTriple().isOSWindows()) { // Identify valid longjmp targets for Windows Control Flow Guard. addPass(createCFGuardLongjmpPass()); // Identify valid eh continuation targets for Windows EHCont Guard. addPass(createEHContGuardCatchretPass()); } } yaml::MachineFunctionInfo * ARMBaseTargetMachine::createDefaultFuncInfoYAML() const { return new yaml::ARMFunctionInfo(); } yaml::MachineFunctionInfo * ARMBaseTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { const auto *MFI = MF.getInfo(); return new yaml::ARMFunctionInfo(*MFI); } bool ARMBaseTargetMachine::parseMachineFunctionInfo( const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const { const auto &YamlMFI = static_cast(MFI); MachineFunction &MF = PFS.MF; MF.getInfo()->initializeBaseYamlFields(YamlMFI); return false; } diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h index 69d8fa8ada64..75ee50f0e93c 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -1,120 +1,121 @@ //===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file declares the ARM specific subclass of TargetMachine. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H #define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H #include "ARMSubtarget.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetMachine.h" #include #include namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { public: enum ARMABI { ARM_ABI_UNKNOWN, ARM_ABI_APCS, ARM_ABI_AAPCS, // ARM EABI ARM_ABI_AAPCS16 } TargetABI; protected: std::unique_ptr TLOF; bool isLittle; mutable StringMap> SubtargetMap; public: ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool isLittle); ~ARMBaseTargetMachine() override; const ARMSubtarget *getSubtargetImpl(const Function &F) const override; // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget, // subtargets are per-function entities based on the target-specific // attributes of each function. const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } TargetTransformInfo getTargetTransformInfo(const Function &F) const override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } bool isTargetHardFloat() const { return TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::GNUEABIHFT64 || TargetTriple.getEnvironment() == Triple::MuslEABIHF || TargetTriple.getEnvironment() == Triple::EABIHF || (TargetTriple.isOSBinFormatMachO() && TargetTriple.getSubArch() == Triple::ARMSubArch_v7em) || TargetTriple.isOSWindows() || TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; } bool targetSchedulesPostRAScheduling() const override { return true; }; MachineFunctionInfo * createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const override; /// Returns true if a cast between SrcAS and DestAS is a noop. bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { // Addrspacecasts are always noops. return true; } yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override; bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override; }; /// ARM/Thumb little endian target machine. /// class ARMLETargetMachine : public ARMBaseTargetMachine { public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT); }; /// ARM/Thumb big endian target machine. /// class ARMBETargetMachine : public ARMBaseTargetMachine { public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT); }; } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H diff --git a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 388d58a82214..c0bc1276967b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -1,534 +1,534 @@ //===-- AVRAsmBackend.cpp - AVR Asm Backend ------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements the AVRAsmBackend class. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/AVRAsmBackend.h" #include "MCTargetDesc/AVRFixupKinds.h" #include "MCTargetDesc/AVRMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" // FIXME: we should be doing checks to make sure asm operands // are not out of bounds. namespace adjust { using namespace llvm; static void signed_width(unsigned Width, uint64_t Value, std::string Description, const MCFixup &Fixup, MCContext *Ctx = nullptr) { if (!isIntN(Width, Value)) { std::string Diagnostic = "out of range " + Description; int64_t Min = minIntN(Width); int64_t Max = maxIntN(Width); Diagnostic += " (expected an integer in the range " + std::to_string(Min) + " to " + std::to_string(Max) + ")"; if (Ctx) { Ctx->reportError(Fixup.getLoc(), Diagnostic); } else { llvm_unreachable(Diagnostic.c_str()); } } } static void unsigned_width(unsigned Width, uint64_t Value, std::string Description, const MCFixup &Fixup, MCContext *Ctx = nullptr) { if (!isUIntN(Width, Value)) { std::string Diagnostic = "out of range " + Description; int64_t Max = maxUIntN(Width); Diagnostic += " (expected an integer in the range 0 to " + std::to_string(Max) + ")"; if (Ctx) { Ctx->reportError(Fixup.getLoc(), Diagnostic); } else { llvm_unreachable(Diagnostic.c_str()); } } } /// Adjusts the value of a branch target before fixup application. static void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { // We have one extra bit of precision because the value is rightshifted by // one. unsigned_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx); // Rightshifts the value by one. AVR::fixups::adjustBranchTarget(Value); } /// Adjusts the value of a relative branch target before fixup application. static void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { + // Jumps are relative to the current instruction. + Value -= 2; + // We have one extra bit of precision because the value is rightshifted by // one. signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx); // Rightshifts the value by one. AVR::fixups::adjustBranchTarget(Value); - - // Jumps are relative to the current instruction. - Value -= 1; } /// 22-bit absolute fixup. /// /// Resolves to: /// 1001 kkkk 010k kkkk kkkk kkkk 111k kkkk /// /// Offset of 0 (so the result is left shifted by 3 bits before application). static void fixup_call(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { adjustBranch(Size, Fixup, Value, Ctx); auto top = Value & (0xf00000 << 6); // the top four bits auto middle = Value & (0x1ffff << 5); // the middle 13 bits auto bottom = Value & 0x1f; // end bottom 5 bits Value = (top << 6) | (middle << 3) | (bottom << 0); } /// 7-bit PC-relative fixup. /// /// Resolves to: /// 0000 00kk kkkk k000 /// Offset of 0 (so the result is left shifted by 3 bits before application). static void fixup_7_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { adjustRelativeBranch(Size, Fixup, Value, Ctx); // Because the value may be negative, we must mask out the sign bits Value &= 0x7f; } /// 12-bit PC-relative fixup. /// Yes, the fixup is 12 bits even though the name says otherwise. /// /// Resolves to: /// 0000 kkkk kkkk kkkk /// Offset of 0 (so the result isn't left-shifted before application). static void fixup_13_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { adjustRelativeBranch(Size, Fixup, Value, Ctx); // Because the value may be negative, we must mask out the sign bits Value &= 0xfff; } /// 6-bit fixup for the immediate operand of the STD/LDD family of /// instructions. /// /// Resolves to: /// 10q0 qq10 0000 1qqq static void fixup_6(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx); Value = ((Value & 0x20) << 8) | ((Value & 0x18) << 7) | (Value & 0x07); } /// 6-bit fixup for the immediate operand of the ADIW family of /// instructions. /// /// Resolves to: /// 0000 0000 kk00 kkkk static void fixup_6_adiw(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx); Value = ((Value & 0x30) << 2) | (Value & 0x0f); } /// 5-bit port number fixup on the SBIC family of instructions. /// /// Resolves to: /// 0000 0000 AAAA A000 static void fixup_port5(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { unsigned_width(5, Value, std::string("port number"), Fixup, Ctx); Value &= 0x1f; Value <<= 3; } /// 6-bit port number fixup on the `IN` family of instructions. /// /// Resolves to: /// 1011 0AAd dddd AAAA static void fixup_port6(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { unsigned_width(6, Value, std::string("port number"), Fixup, Ctx); Value = ((Value & 0x30) << 5) | (Value & 0x0f); } /// 7-bit data space address fixup for the LDS/STS instructions on AVRTiny. /// /// Resolves to: /// 1010 ikkk dddd kkkk static void fixup_lds_sts_16(const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { unsigned_width(7, Value, std::string("immediate"), Fixup, Ctx); Value = ((Value & 0x70) << 8) | (Value & 0x0f); } /// Adjusts a program memory address. /// This is a simple right-shift. static void pm(uint64_t &Value) { Value >>= 1; } /// Fixups relating to the LDI instruction. namespace ldi { /// Adjusts a value to fix up the immediate of an `LDI Rd, K` instruction. /// /// Resolves to: /// 0000 KKKK 0000 KKKK /// Offset of 0 (so the result isn't left-shifted before application). static void fixup(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { uint64_t upper = Value & 0xf0; uint64_t lower = Value & 0x0f; Value = (upper << 4) | lower; } static void neg(uint64_t &Value) { Value *= -1; } static void lo8(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { Value &= 0xff; ldi::fixup(Size, Fixup, Value, Ctx); } static void hi8(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { Value = (Value & 0xff00) >> 8; ldi::fixup(Size, Fixup, Value, Ctx); } static void hh8(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { Value = (Value & 0xff0000) >> 16; ldi::fixup(Size, Fixup, Value, Ctx); } static void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value, MCContext *Ctx = nullptr) { Value = (Value & 0xff000000) >> 24; ldi::fixup(Size, Fixup, Value, Ctx); } } // namespace ldi } // namespace adjust namespace llvm { // Prepare value for the target space for it void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, const MCValue &Target, uint64_t &Value, MCContext *Ctx) const { // The size of the fixup in bits. uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize; unsigned Kind = Fixup.getKind(); switch (Kind) { default: llvm_unreachable("unhandled fixup"); case AVR::fixup_7_pcrel: adjust::fixup_7_pcrel(Size, Fixup, Value, Ctx); break; case AVR::fixup_13_pcrel: adjust::fixup_13_pcrel(Size, Fixup, Value, Ctx); break; case AVR::fixup_call: adjust::fixup_call(Size, Fixup, Value, Ctx); break; case AVR::fixup_ldi: adjust::ldi::fixup(Size, Fixup, Value, Ctx); break; case AVR::fixup_lo8_ldi: adjust::ldi::lo8(Size, Fixup, Value, Ctx); break; case AVR::fixup_lo8_ldi_pm: case AVR::fixup_lo8_ldi_gs: adjust::pm(Value); adjust::ldi::lo8(Size, Fixup, Value, Ctx); break; case AVR::fixup_hi8_ldi: adjust::ldi::hi8(Size, Fixup, Value, Ctx); break; case AVR::fixup_hi8_ldi_pm: case AVR::fixup_hi8_ldi_gs: adjust::pm(Value); adjust::ldi::hi8(Size, Fixup, Value, Ctx); break; case AVR::fixup_hh8_ldi: case AVR::fixup_hh8_ldi_pm: if (Kind == AVR::fixup_hh8_ldi_pm) adjust::pm(Value); adjust::ldi::hh8(Size, Fixup, Value, Ctx); break; case AVR::fixup_ms8_ldi: adjust::ldi::ms8(Size, Fixup, Value, Ctx); break; case AVR::fixup_lo8_ldi_neg: case AVR::fixup_lo8_ldi_pm_neg: if (Kind == AVR::fixup_lo8_ldi_pm_neg) adjust::pm(Value); adjust::ldi::neg(Value); adjust::ldi::lo8(Size, Fixup, Value, Ctx); break; case AVR::fixup_hi8_ldi_neg: case AVR::fixup_hi8_ldi_pm_neg: if (Kind == AVR::fixup_hi8_ldi_pm_neg) adjust::pm(Value); adjust::ldi::neg(Value); adjust::ldi::hi8(Size, Fixup, Value, Ctx); break; case AVR::fixup_hh8_ldi_neg: case AVR::fixup_hh8_ldi_pm_neg: if (Kind == AVR::fixup_hh8_ldi_pm_neg) adjust::pm(Value); adjust::ldi::neg(Value); adjust::ldi::hh8(Size, Fixup, Value, Ctx); break; case AVR::fixup_ms8_ldi_neg: adjust::ldi::neg(Value); adjust::ldi::ms8(Size, Fixup, Value, Ctx); break; case AVR::fixup_16: adjust::unsigned_width(16, Value, std::string("port number"), Fixup, Ctx); Value &= 0xffff; break; case AVR::fixup_16_pm: Value >>= 1; // Flash addresses are always shifted. adjust::unsigned_width(16, Value, std::string("port number"), Fixup, Ctx); Value &= 0xffff; break; case AVR::fixup_6: adjust::fixup_6(Fixup, Value, Ctx); break; case AVR::fixup_6_adiw: adjust::fixup_6_adiw(Fixup, Value, Ctx); break; case AVR::fixup_port5: adjust::fixup_port5(Fixup, Value, Ctx); break; case AVR::fixup_port6: adjust::fixup_port6(Fixup, Value, Ctx); break; case AVR::fixup_lds_sts_16: adjust::fixup_lds_sts_16(Fixup, Value, Ctx); break; // Fixups which do not require adjustments. case FK_Data_1: case FK_Data_2: case FK_Data_4: case FK_Data_8: break; case FK_GPRel_4: llvm_unreachable("don't know how to adjust this fixup"); break; } } std::unique_ptr AVRAsmBackend::createObjectTargetWriter() const { return createAVRELFObjectWriter(MCELFObjectTargetWriter::getOSABI(OSType)); } void AVRAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef Data, uint64_t Value, bool IsResolved, const MCSubtargetInfo *STI) const { if (Fixup.getKind() >= FirstLiteralRelocationKind) return; adjustFixupValue(Fixup, Target, Value, &Asm.getContext()); if (Value == 0) return; // Doesn't change encoding. MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind()); // The number of bits in the fixup mask auto NumBits = Info.TargetSize + Info.TargetOffset; auto NumBytes = (NumBits / 8) + ((NumBits % 8) == 0 ? 0 : 1); // Shift the value into position. Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. for (unsigned i = 0; i < NumBytes; ++i) { uint8_t mask = (((Value >> (i * 8)) & 0xff)); Data[Offset + i] |= mask; } } std::optional AVRAsmBackend::getFixupKind(StringRef Name) const { unsigned Type; Type = llvm::StringSwitch(Name) #define ELF_RELOC(X, Y) .Case(#X, Y) #include "llvm/BinaryFormat/ELFRelocs/AVR.def" #undef ELF_RELOC .Case("BFD_RELOC_NONE", ELF::R_AVR_NONE) .Case("BFD_RELOC_16", ELF::R_AVR_16) .Case("BFD_RELOC_32", ELF::R_AVR_32) .Default(-1u); if (Type != -1u) return static_cast(FirstLiteralRelocationKind + Type); return std::nullopt; } MCFixupKindInfo const &AVRAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { // NOTE: Many AVR fixups work on sets of non-contignous bits. We work around // this by saying that the fixup is the size of the entire instruction. const static MCFixupKindInfo Infos[AVR::NumTargetFixupKinds] = { // This table *must* be in same the order of fixup_* kinds in // AVRFixupKinds.h. // // name offset bits flags {"fixup_32", 0, 32, 0}, {"fixup_7_pcrel", 3, 7, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_13_pcrel", 0, 12, MCFixupKindInfo::FKF_IsPCRel}, {"fixup_16", 0, 16, 0}, {"fixup_16_pm", 0, 16, 0}, {"fixup_ldi", 0, 8, 0}, {"fixup_lo8_ldi", 0, 8, 0}, {"fixup_hi8_ldi", 0, 8, 0}, {"fixup_hh8_ldi", 0, 8, 0}, {"fixup_ms8_ldi", 0, 8, 0}, {"fixup_lo8_ldi_neg", 0, 8, 0}, {"fixup_hi8_ldi_neg", 0, 8, 0}, {"fixup_hh8_ldi_neg", 0, 8, 0}, {"fixup_ms8_ldi_neg", 0, 8, 0}, {"fixup_lo8_ldi_pm", 0, 8, 0}, {"fixup_hi8_ldi_pm", 0, 8, 0}, {"fixup_hh8_ldi_pm", 0, 8, 0}, {"fixup_lo8_ldi_pm_neg", 0, 8, 0}, {"fixup_hi8_ldi_pm_neg", 0, 8, 0}, {"fixup_hh8_ldi_pm_neg", 0, 8, 0}, {"fixup_call", 0, 22, 0}, {"fixup_6", 0, 16, 0}, // non-contiguous {"fixup_6_adiw", 0, 6, 0}, {"fixup_lo8_ldi_gs", 0, 8, 0}, {"fixup_hi8_ldi_gs", 0, 8, 0}, {"fixup_8", 0, 8, 0}, {"fixup_8_lo8", 0, 8, 0}, {"fixup_8_hi8", 0, 8, 0}, {"fixup_8_hlo8", 0, 8, 0}, {"fixup_diff8", 0, 8, 0}, {"fixup_diff16", 0, 16, 0}, {"fixup_diff32", 0, 32, 0}, {"fixup_lds_sts_16", 0, 16, 0}, {"fixup_port6", 0, 16, 0}, // non-contiguous {"fixup_port5", 3, 5, 0}, }; // Fixup kinds from .reloc directive are like R_AVR_NONE. They do not require // any extra processing. if (Kind >= FirstLiteralRelocationKind) return MCAsmBackend::getFixupKindInfo(FK_NONE); if (Kind < FirstTargetFixupKind) return MCAsmBackend::getFixupKindInfo(Kind); assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count, const MCSubtargetInfo *STI) const { // If the count is not 2-byte aligned, we must be writing data into the text // section (otherwise we have unaligned instructions, and thus have far // bigger problems), so just write zeros instead. assert((Count % 2) == 0 && "NOP instructions must be 2 bytes"); OS.write_zeros(Count); return true; } bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, const MCSubtargetInfo *STI) { switch ((unsigned)Fixup.getKind()) { default: return Fixup.getKind() >= FirstLiteralRelocationKind; case AVR::fixup_7_pcrel: case AVR::fixup_13_pcrel: // Always resolve relocations for PC-relative branches return false; case AVR::fixup_call: return true; } } MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, const llvm::MCTargetOptions &TO) { return new AVRAsmBackend(STI.getTargetTriple().getOS()); } } // end of namespace llvm diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 23539a5f4b26..ac9e6d5a90cb 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -1,657 +1,653 @@ //===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// /// \file /// This file defines the WebAssembly-specific subclass of TargetMachine. /// //===----------------------------------------------------------------------===// #include "WebAssemblyTargetMachine.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "TargetInfo/WebAssemblyTargetInfo.h" #include "WebAssembly.h" #include "WebAssemblyISelLowering.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblyTargetObjectFile.h" #include "WebAssemblyTargetTransformInfo.h" #include "WebAssemblyUtilities.h" #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LowerAtomicPass.h" #include "llvm/Transforms/Utils.h" #include using namespace llvm; #define DEBUG_TYPE "wasm" // A command-line option to keep implicit locals // for the purpose of testing with lit/llc ONLY. // This produces output which is not valid WebAssembly, and is not supported // by assemblers/disassemblers and other MC based tools. static cl::opt WasmDisableExplicitLocals( "wasm-disable-explicit-locals", cl::Hidden, cl::desc("WebAssembly: output implicit locals in" " instruction output for test purposes only."), cl::init(false)); static cl::opt WasmDisableFixIrreducibleControlFlowPass( "wasm-disable-fix-irreducible-control-flow-pass", cl::Hidden, cl::desc("webassembly: disables the fix " " irreducible control flow optimization pass"), cl::init(false)); extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeWebAssemblyTarget() { // Register the target. RegisterTargetMachine X( getTheWebAssemblyTarget32()); RegisterTargetMachine Y( getTheWebAssemblyTarget64()); // Register backend passes auto &PR = *PassRegistry::getPassRegistry(); initializeWebAssemblyAddMissingPrototypesPass(PR); initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR); initializeLowerGlobalDtorsLegacyPassPass(PR); initializeFixFunctionBitcastsPass(PR); initializeOptimizeReturnedPass(PR); initializeWebAssemblyRefTypeMem2LocalPass(PR); initializeWebAssemblyArgumentMovePass(PR); initializeWebAssemblySetP2AlignOperandsPass(PR); initializeWebAssemblyReplacePhysRegsPass(PR); initializeWebAssemblyOptimizeLiveIntervalsPass(PR); initializeWebAssemblyMemIntrinsicResultsPass(PR); initializeWebAssemblyRegStackifyPass(PR); initializeWebAssemblyRegColoringPass(PR); initializeWebAssemblyNullifyDebugValueListsPass(PR); initializeWebAssemblyFixIrreducibleControlFlowPass(PR); initializeWebAssemblyLateEHPreparePass(PR); initializeWebAssemblyExceptionInfoPass(PR); initializeWebAssemblyCFGSortPass(PR); initializeWebAssemblyCFGStackifyPass(PR); initializeWebAssemblyExplicitLocalsPass(PR); initializeWebAssemblyLowerBrUnlessPass(PR); initializeWebAssemblyRegNumberingPass(PR); initializeWebAssemblyDebugFixupPass(PR); initializeWebAssemblyPeepholePass(PR); initializeWebAssemblyMCLowerPrePassPass(PR); initializeWebAssemblyLowerRefTypesIntPtrConvPass(PR); initializeWebAssemblyFixBrTableDefaultsPass(PR); initializeWebAssemblyDAGToDAGISelLegacyPass(PR); } //===----------------------------------------------------------------------===// // WebAssembly Lowering public interface. //===----------------------------------------------------------------------===// static Reloc::Model getEffectiveRelocModel(std::optional RM, const Triple &TT) { if (!RM) { // Default to static relocation model. This should always be more optimial // than PIC since the static linker can determine all global addresses and // assume direct function calls. return Reloc::Static; } return *RM; } /// Create an WebAssembly architecture model. /// WebAssemblyTargetMachine::WebAssemblyTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, std::optional RM, std::optional CM, CodeGenOptLevel OL, bool JIT) : LLVMTargetMachine( T, TT.isArch64Bit() ? (TT.isOSEmscripten() ? "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-" "f128:64-n32:64-S128-ni:1:10:20" : "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-" "n32:64-S128-ni:1:10:20") : (TT.isOSEmscripten() ? "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-" "f128:64-n32:64-S128-ni:1:10:20" : "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-" "n32:64-S128-ni:1:10:20"), TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT), getEffectiveCodeModel(CM, CodeModel::Large), OL), TLOF(new WebAssemblyTargetObjectFile()), UsesMultivalueABI(Options.MCOptions.getABIName() == "experimental-mv") { // WebAssembly type-checks instructions, but a noreturn function with a return // type that doesn't match the context will cause a check failure. So we lower // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's // 'unreachable' instructions which is meant for that case. this->Options.TrapUnreachable = true; this->Options.NoTrapAfterNoreturn = false; // WebAssembly treats each function as an independent unit. Force // -ffunction-sections, effectively, so that we can emit them independently. this->Options.FunctionSections = true; this->Options.DataSections = true; this->Options.UniqueSectionNames = true; initAsmInfo(); // Note that we don't use setRequiresStructuredCFG(true). It disables // optimizations than we're ok with, and want, such as critical edge // splitting and tail merging. } WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor. const WebAssemblySubtarget *WebAssemblyTargetMachine::getSubtargetImpl() const { return getSubtargetImpl(std::string(getTargetCPU()), std::string(getTargetFeatureString())); } const WebAssemblySubtarget * WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, std::string FS) const { auto &I = SubtargetMap[CPU + FS]; if (!I) { I = std::make_unique(TargetTriple, CPU, FS, *this); } return I.get(); } const WebAssemblySubtarget * WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; // This needs to be done before we create a new subtarget since any // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); return getSubtargetImpl(CPU, FS); } namespace { class CoalesceFeaturesAndStripAtomics final : public ModulePass { // Take the union of all features used in the module and use it for each // function individually, since having multiple feature sets in one module // currently does not make sense for WebAssembly. If atomics are not enabled, // also strip atomic operations and thread local storage. static char ID; WebAssemblyTargetMachine *WasmTM; public: CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM) : ModulePass(ID), WasmTM(WasmTM) {} bool runOnModule(Module &M) override { FeatureBitset Features = coalesceFeatures(M); - std::string FeatureStr = - getFeatureString(Features, WasmTM->getTargetFeatureString()); + std::string FeatureStr = getFeatureString(Features); WasmTM->setTargetFeatureString(FeatureStr); for (auto &F : M) replaceFeatures(F, FeatureStr); bool StrippedAtomics = false; bool StrippedTLS = false; if (!Features[WebAssembly::FeatureAtomics]) { StrippedAtomics = stripAtomics(M); StrippedTLS = stripThreadLocals(M); } else if (!Features[WebAssembly::FeatureBulkMemory]) { StrippedTLS |= stripThreadLocals(M); } if (StrippedAtomics && !StrippedTLS) stripThreadLocals(M); else if (StrippedTLS && !StrippedAtomics) stripAtomics(M); recordFeatures(M, Features, StrippedAtomics || StrippedTLS); // Conservatively assume we have made some change return true; } private: FeatureBitset coalesceFeatures(const Module &M) { FeatureBitset Features = WasmTM ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()), std::string(WasmTM->getTargetFeatureString())) ->getFeatureBits(); for (auto &F : M) Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits(); return Features; } - static std::string getFeatureString(const FeatureBitset &Features, - StringRef TargetFS) { + static std::string getFeatureString(const FeatureBitset &Features) { std::string Ret; for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { if (Features[KV.Value]) Ret += (StringRef("+") + KV.Key + ",").str(); + else + Ret += (StringRef("-") + KV.Key + ",").str(); } - SubtargetFeatures TF{TargetFS}; - for (std::string const &F : TF.getFeatures()) - if (!SubtargetFeatures::isEnabled(F)) - Ret += F + ","; return Ret; } void replaceFeatures(Function &F, const std::string &Features) { F.removeFnAttr("target-features"); F.removeFnAttr("target-cpu"); F.addFnAttr("target-features", Features); } bool stripAtomics(Module &M) { // Detect whether any atomics will be lowered, since there is no way to tell // whether the LowerAtomic pass lowers e.g. stores. bool Stripped = false; for (auto &F : M) { for (auto &B : F) { for (auto &I : B) { if (I.isAtomic()) { Stripped = true; goto done; } } } } done: if (!Stripped) return false; LowerAtomicPass Lowerer; FunctionAnalysisManager FAM; for (auto &F : M) Lowerer.run(F, FAM); return true; } bool stripThreadLocals(Module &M) { bool Stripped = false; for (auto &GV : M.globals()) { if (GV.isThreadLocal()) { // replace `@llvm.threadlocal.address.pX(GV)` with `GV`. for (Use &U : make_early_inc_range(GV.uses())) { if (IntrinsicInst *II = dyn_cast(U.getUser())) { if (II->getIntrinsicID() == Intrinsic::threadlocal_address && II->getArgOperand(0) == &GV) { II->replaceAllUsesWith(&GV); II->eraseFromParent(); } } } Stripped = true; GV.setThreadLocal(false); } } return Stripped; } void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) { for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { if (Features[KV.Value]) { // Mark features as used std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, wasm::WASM_FEATURE_PREFIX_USED); } } // Code compiled without atomics or bulk-memory may have had its atomics or // thread-local data lowered to nonatomic operations or non-thread-local // data. In that case, we mark the pseudo-feature "shared-mem" as disallowed // to tell the linker that it would be unsafe to allow this code ot be used // in a module with shared memory. if (Stripped) { M.addModuleFlag(Module::ModFlagBehavior::Error, "wasm-feature-shared-mem", wasm::WASM_FEATURE_PREFIX_DISALLOWED); } } }; char CoalesceFeaturesAndStripAtomics::ID = 0; /// WebAssembly Code Generator Pass Configuration Options. class WebAssemblyPassConfig final : public TargetPassConfig { public: WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM) : TargetPassConfig(TM, PM) {} WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { return getTM(); } FunctionPass *createTargetRegisterAllocator(bool) override; void addIRPasses() override; void addISelPrepare() override; bool addInstSelector() override; void addOptimizedRegAlloc() override; void addPostRegAlloc() override; bool addGCPasses() override { return false; } void addPreEmitPass() override; bool addPreISel() override; // No reg alloc bool addRegAssignAndRewriteFast() override { return false; } // No reg alloc bool addRegAssignAndRewriteOptimized() override { return false; } }; } // end anonymous namespace MachineFunctionInfo *WebAssemblyTargetMachine::createMachineFunctionInfo( BumpPtrAllocator &Allocator, const Function &F, const TargetSubtargetInfo *STI) const { return WebAssemblyFunctionInfo::create(Allocator, F, STI); } TargetTransformInfo WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); } TargetPassConfig * WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { return new WebAssemblyPassConfig(*this, PM); } FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { return nullptr; // No reg alloc } using WebAssembly::WasmEnableEH; using WebAssembly::WasmEnableEmEH; using WebAssembly::WasmEnableEmSjLj; using WebAssembly::WasmEnableExnref; using WebAssembly::WasmEnableSjLj; static void basicCheckForEHAndSjLj(TargetMachine *TM) { // You can't enable two modes of EH at the same time if (WasmEnableEmEH && WasmEnableEH) report_fatal_error( "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh"); // You can't enable two modes of SjLj at the same time if (WasmEnableEmSjLj && WasmEnableSjLj) report_fatal_error( "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj"); // You can't mix Emscripten EH with Wasm SjLj. if (WasmEnableEmEH && WasmEnableSjLj) report_fatal_error( "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj"); if (WasmEnableExnref && !WasmEnableEH) report_fatal_error( "-wasm-enable-exnref should be used with -wasm-enable-eh"); // Here we make sure TargetOptions.ExceptionModel is the same as // MCAsmInfo.ExceptionsType. Normally these have to be the same, because clang // stores the exception model info in LangOptions, which is later transferred // to TargetOptions and MCAsmInfo. But when clang compiles bitcode directly, // clang's LangOptions is not used and thus the exception model info is not // correctly transferred to TargetOptions and MCAsmInfo, so we make sure we // have the correct exception model in WebAssemblyMCAsmInfo constructor. But // in this case TargetOptions is still not updated, so we make sure they are // the same. TM->Options.ExceptionModel = TM->getMCAsmInfo()->getExceptionHandlingType(); // Basic Correctness checking related to -exception-model if (TM->Options.ExceptionModel != ExceptionHandling::None && TM->Options.ExceptionModel != ExceptionHandling::Wasm) report_fatal_error("-exception-model should be either 'none' or 'wasm'"); if (WasmEnableEmEH && TM->Options.ExceptionModel == ExceptionHandling::Wasm) report_fatal_error("-exception-model=wasm not allowed with " "-enable-emscripten-cxx-exceptions"); if (WasmEnableEH && TM->Options.ExceptionModel != ExceptionHandling::Wasm) report_fatal_error( "-wasm-enable-eh only allowed with -exception-model=wasm"); if (WasmEnableSjLj && TM->Options.ExceptionModel != ExceptionHandling::Wasm) report_fatal_error( "-wasm-enable-sjlj only allowed with -exception-model=wasm"); if ((!WasmEnableEH && !WasmEnableSjLj) && TM->Options.ExceptionModel == ExceptionHandling::Wasm) report_fatal_error( "-exception-model=wasm only allowed with at least one of " "-wasm-enable-eh or -wasm-enable-sjlj"); // Currently it is allowed to mix Wasm EH with Emscripten SjLj as an interim // measure, but some code will error out at compile time in this combination. // See WebAssemblyLowerEmscriptenEHSjLj pass for details. } //===----------------------------------------------------------------------===// // The following functions are called from lib/CodeGen/Passes.cpp to modify // the CodeGen pass sequence. //===----------------------------------------------------------------------===// void WebAssemblyPassConfig::addIRPasses() { // Add signatures to prototype-less function declarations addPass(createWebAssemblyAddMissingPrototypes()); // Lower .llvm.global_dtors into .llvm.global_ctors with __cxa_atexit calls. addPass(createLowerGlobalDtorsLegacyPass()); // Fix function bitcasts, as WebAssembly requires caller and callee signatures // to match. addPass(createWebAssemblyFixFunctionBitcasts()); // Optimize "returned" function attributes. if (getOptLevel() != CodeGenOptLevel::None) addPass(createWebAssemblyOptimizeReturned()); basicCheckForEHAndSjLj(TM); // If exception handling is not enabled and setjmp/longjmp handling is // enabled, we lower invokes into calls and delete unreachable landingpad // blocks. Lowering invokes when there is no EH support is done in // TargetPassConfig::addPassesToHandleExceptions, but that runs after these IR // passes and Emscripten SjLj handling expects all invokes to be lowered // before. if (!WasmEnableEmEH && !WasmEnableEH) { addPass(createLowerInvokePass()); // The lower invoke pass may create unreachable code. Remove it in order not // to process dead blocks in setjmp/longjmp handling. addPass(createUnreachableBlockEliminationPass()); } // Handle exceptions and setjmp/longjmp if enabled. Unlike Wasm EH preparation // done in WasmEHPrepare pass, Wasm SjLj preparation shares libraries and // transformation algorithms with Emscripten SjLj, so we run // LowerEmscriptenEHSjLj pass also when Wasm SjLj is enabled. if (WasmEnableEmEH || WasmEnableEmSjLj || WasmEnableSjLj) addPass(createWebAssemblyLowerEmscriptenEHSjLj()); // Expand indirectbr instructions to switches. addPass(createIndirectBrExpandPass()); TargetPassConfig::addIRPasses(); } void WebAssemblyPassConfig::addISelPrepare() { // We need to move reference type allocas to WASM_ADDRESS_SPACE_VAR so that // loads and stores are promoted to local.gets/local.sets. addPass(createWebAssemblyRefTypeMem2Local()); // Lower atomics and TLS if necessary addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); // This is a no-op if atomics are not used in the module addPass(createAtomicExpandLegacyPass()); TargetPassConfig::addISelPrepare(); } bool WebAssemblyPassConfig::addInstSelector() { (void)TargetPassConfig::addInstSelector(); addPass( createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); // Run the argument-move pass immediately after the ScheduleDAG scheduler // so that we can fix up the ARGUMENT instructions before anything else // sees them in the wrong place. addPass(createWebAssemblyArgumentMove()); // Set the p2align operands. This information is present during ISel, however // it's inconvenient to collect. Collect it now, and update the immediate // operands. addPass(createWebAssemblySetP2AlignOperands()); // Eliminate range checks and add default targets to br_table instructions. addPass(createWebAssemblyFixBrTableDefaults()); // unreachable is terminator, non-terminator instruction after it is not // allowed. addPass(createWebAssemblyCleanCodeAfterTrap()); return false; } void WebAssemblyPassConfig::addOptimizedRegAlloc() { // Currently RegisterCoalesce degrades wasm debug info quality by a // significant margin. As a quick fix, disable this for -O1, which is often // used for debugging large applications. Disabling this increases code size // of Emscripten core benchmarks by ~5%, which is acceptable for -O1, which is // usually not used for production builds. // TODO Investigate why RegisterCoalesce degrades debug info quality and fix // it properly if (getOptLevel() == CodeGenOptLevel::Less) disablePass(&RegisterCoalescerID); TargetPassConfig::addOptimizedRegAlloc(); } void WebAssemblyPassConfig::addPostRegAlloc() { // TODO: The following CodeGen passes don't currently support code containing // virtual registers. Consider removing their restrictions and re-enabling // them. // These functions all require the NoVRegs property. disablePass(&MachineLateInstrsCleanupID); disablePass(&MachineCopyPropagationID); disablePass(&PostRAMachineSinkingID); disablePass(&PostRASchedulerID); disablePass(&FuncletLayoutID); disablePass(&StackMapLivenessID); disablePass(&PatchableFunctionID); disablePass(&ShrinkWrapID); // This pass hurts code size for wasm because it can generate irreducible // control flow. disablePass(&MachineBlockPlacementID); TargetPassConfig::addPostRegAlloc(); } void WebAssemblyPassConfig::addPreEmitPass() { TargetPassConfig::addPreEmitPass(); // Nullify DBG_VALUE_LISTs that we cannot handle. addPass(createWebAssemblyNullifyDebugValueLists()); // Eliminate multiple-entry loops. if (!WasmDisableFixIrreducibleControlFlowPass) addPass(createWebAssemblyFixIrreducibleControlFlow()); // Do various transformations for exception handling. // Every CFG-changing optimizations should come before this. if (TM->Options.ExceptionModel == ExceptionHandling::Wasm) addPass(createWebAssemblyLateEHPrepare()); // Now that we have a prologue and epilogue and all frame indices are // rewritten, eliminate SP and FP. This allows them to be stackified, // colored, and numbered with the rest of the registers. addPass(createWebAssemblyReplacePhysRegs()); // Preparations and optimizations related to register stackification. if (getOptLevel() != CodeGenOptLevel::None) { // Depend on LiveIntervals and perform some optimizations on it. addPass(createWebAssemblyOptimizeLiveIntervals()); // Prepare memory intrinsic calls for register stackifying. addPass(createWebAssemblyMemIntrinsicResults()); // Mark registers as representing wasm's value stack. This is a key // code-compression technique in WebAssembly. We run this pass (and // MemIntrinsicResults above) very late, so that it sees as much code as // possible, including code emitted by PEI and expanded by late tail // duplication. addPass(createWebAssemblyRegStackify()); // Run the register coloring pass to reduce the total number of registers. // This runs after stackification so that it doesn't consider registers // that become stackified. addPass(createWebAssemblyRegColoring()); } // Sort the blocks of the CFG into topological order, a prerequisite for // BLOCK and LOOP markers. addPass(createWebAssemblyCFGSort()); // Insert BLOCK and LOOP markers. addPass(createWebAssemblyCFGStackify()); // Insert explicit local.get and local.set operators. if (!WasmDisableExplicitLocals) addPass(createWebAssemblyExplicitLocals()); // Lower br_unless into br_if. addPass(createWebAssemblyLowerBrUnless()); // Perform the very last peephole optimizations on the code. if (getOptLevel() != CodeGenOptLevel::None) addPass(createWebAssemblyPeephole()); // Create a mapping from LLVM CodeGen virtual registers to wasm registers. addPass(createWebAssemblyRegNumbering()); // Fix debug_values whose defs have been stackified. if (!WasmDisableExplicitLocals) addPass(createWebAssemblyDebugFixup()); // Collect information to prepare for MC lowering / asm printing. addPass(createWebAssemblyMCLowerPrePass()); } bool WebAssemblyPassConfig::addPreISel() { TargetPassConfig::addPreISel(); addPass(createWebAssemblyLowerRefTypesIntPtrConv()); return false; } yaml::MachineFunctionInfo * WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const { return new yaml::WebAssemblyFunctionInfo(); } yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML( const MachineFunction &MF) const { const auto *MFI = MF.getInfo(); return new yaml::WebAssemblyFunctionInfo(MF, *MFI); } bool WebAssemblyTargetMachine::parseMachineFunctionInfo( const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const { const auto &YamlMFI = static_cast(MFI); MachineFunction &MF = PFS.MF; MF.getInfo()->initializeBaseYamlFields(MF, YamlMFI); return false; } diff --git a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp index 9d9917d86a36..5e9dd94b84b2 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/ARMTargetParser.cpp @@ -1,661 +1,664 @@ //===-- ARMTargetParser - Parser for ARM target features --------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a target parser to recognise ARM hardware features // such as FPU/CPU/ARCH/extensions and specific support such as HWDIV. // //===----------------------------------------------------------------------===// #include "llvm/TargetParser/ARMTargetParser.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" #include "llvm/TargetParser/Triple.h" #include using namespace llvm; static StringRef getHWDivSynonym(StringRef HWDiv) { return StringSwitch(HWDiv) .Case("thumb,arm", "arm,thumb") .Default(HWDiv); } // Allows partial match, ex. "v7a" matches "armv7a". ARM::ArchKind ARM::parseArch(StringRef Arch) { Arch = getCanonicalArchName(Arch); StringRef Syn = getArchSynonym(Arch); for (const auto &A : ARMArchNames) { if (A.Name.ends_with(Syn)) return A.ID; } return ArchKind::INVALID; } // Version number (ex. v7 = 7). unsigned ARM::parseArchVersion(StringRef Arch) { Arch = getCanonicalArchName(Arch); switch (parseArch(Arch)) { case ArchKind::ARMV4: case ArchKind::ARMV4T: return 4; case ArchKind::ARMV5T: case ArchKind::ARMV5TE: case ArchKind::IWMMXT: case ArchKind::IWMMXT2: case ArchKind::XSCALE: case ArchKind::ARMV5TEJ: return 5; case ArchKind::ARMV6: case ArchKind::ARMV6K: case ArchKind::ARMV6T2: case ArchKind::ARMV6KZ: case ArchKind::ARMV6M: return 6; case ArchKind::ARMV7A: case ArchKind::ARMV7VE: case ArchKind::ARMV7R: case ArchKind::ARMV7M: case ArchKind::ARMV7S: case ArchKind::ARMV7EM: case ArchKind::ARMV7K: return 7; case ArchKind::ARMV8A: case ArchKind::ARMV8_1A: case ArchKind::ARMV8_2A: case ArchKind::ARMV8_3A: case ArchKind::ARMV8_4A: case ArchKind::ARMV8_5A: case ArchKind::ARMV8_6A: case ArchKind::ARMV8_7A: case ArchKind::ARMV8_8A: case ArchKind::ARMV8_9A: case ArchKind::ARMV8R: case ArchKind::ARMV8MBaseline: case ArchKind::ARMV8MMainline: case ArchKind::ARMV8_1MMainline: return 8; case ArchKind::ARMV9A: case ArchKind::ARMV9_1A: case ArchKind::ARMV9_2A: case ArchKind::ARMV9_3A: case ArchKind::ARMV9_4A: case ArchKind::ARMV9_5A: return 9; case ArchKind::INVALID: return 0; } llvm_unreachable("Unhandled architecture"); } static ARM::ProfileKind getProfileKind(ARM::ArchKind AK) { switch (AK) { case ARM::ArchKind::ARMV6M: case ARM::ArchKind::ARMV7M: case ARM::ArchKind::ARMV7EM: case ARM::ArchKind::ARMV8MMainline: case ARM::ArchKind::ARMV8MBaseline: case ARM::ArchKind::ARMV8_1MMainline: return ARM::ProfileKind::M; case ARM::ArchKind::ARMV7R: case ARM::ArchKind::ARMV8R: return ARM::ProfileKind::R; case ARM::ArchKind::ARMV7A: case ARM::ArchKind::ARMV7VE: case ARM::ArchKind::ARMV7K: case ARM::ArchKind::ARMV8A: case ARM::ArchKind::ARMV8_1A: case ARM::ArchKind::ARMV8_2A: case ARM::ArchKind::ARMV8_3A: case ARM::ArchKind::ARMV8_4A: case ARM::ArchKind::ARMV8_5A: case ARM::ArchKind::ARMV8_6A: case ARM::ArchKind::ARMV8_7A: case ARM::ArchKind::ARMV8_8A: case ARM::ArchKind::ARMV8_9A: case ARM::ArchKind::ARMV9A: case ARM::ArchKind::ARMV9_1A: case ARM::ArchKind::ARMV9_2A: case ARM::ArchKind::ARMV9_3A: case ARM::ArchKind::ARMV9_4A: case ARM::ArchKind::ARMV9_5A: return ARM::ProfileKind::A; case ARM::ArchKind::ARMV4: case ARM::ArchKind::ARMV4T: case ARM::ArchKind::ARMV5T: case ARM::ArchKind::ARMV5TE: case ARM::ArchKind::ARMV5TEJ: case ARM::ArchKind::ARMV6: case ARM::ArchKind::ARMV6K: case ARM::ArchKind::ARMV6T2: case ARM::ArchKind::ARMV6KZ: case ARM::ArchKind::ARMV7S: case ARM::ArchKind::IWMMXT: case ARM::ArchKind::IWMMXT2: case ARM::ArchKind::XSCALE: case ARM::ArchKind::INVALID: return ARM::ProfileKind::INVALID; } llvm_unreachable("Unhandled architecture"); } // Profile A/R/M ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) { Arch = getCanonicalArchName(Arch); return getProfileKind(parseArch(Arch)); } bool ARM::getFPUFeatures(ARM::FPUKind FPUKind, std::vector &Features) { if (FPUKind >= FK_LAST || FPUKind == FK_INVALID) return false; static const struct FPUFeatureNameInfo { const char *PlusName, *MinusName; FPUVersion MinVersion; FPURestriction MaxRestriction; } FPUFeatureInfoList[] = { // We have to specify the + and - versions of the name in full so // that we can return them as static StringRefs. // // Also, the SubtargetFeatures ending in just "sp" are listed here // under FPURestriction::None, which is the only FPURestriction in // which they would be valid (since FPURestriction::SP doesn't // exist). {"+vfp2", "-vfp2", FPUVersion::VFPV2, FPURestriction::D16}, {"+vfp2sp", "-vfp2sp", FPUVersion::VFPV2, FPURestriction::SP_D16}, {"+vfp3", "-vfp3", FPUVersion::VFPV3, FPURestriction::None}, {"+vfp3d16", "-vfp3d16", FPUVersion::VFPV3, FPURestriction::D16}, {"+vfp3d16sp", "-vfp3d16sp", FPUVersion::VFPV3, FPURestriction::SP_D16}, {"+vfp3sp", "-vfp3sp", FPUVersion::VFPV3, FPURestriction::None}, {"+fp16", "-fp16", FPUVersion::VFPV3_FP16, FPURestriction::SP_D16}, {"+vfp4", "-vfp4", FPUVersion::VFPV4, FPURestriction::None}, {"+vfp4d16", "-vfp4d16", FPUVersion::VFPV4, FPURestriction::D16}, {"+vfp4d16sp", "-vfp4d16sp", FPUVersion::VFPV4, FPURestriction::SP_D16}, {"+vfp4sp", "-vfp4sp", FPUVersion::VFPV4, FPURestriction::None}, {"+fp-armv8", "-fp-armv8", FPUVersion::VFPV5, FPURestriction::None}, {"+fp-armv8d16", "-fp-armv8d16", FPUVersion::VFPV5, FPURestriction::D16}, {"+fp-armv8d16sp", "-fp-armv8d16sp", FPUVersion::VFPV5, FPURestriction::SP_D16}, {"+fp-armv8sp", "-fp-armv8sp", FPUVersion::VFPV5, FPURestriction::None}, {"+fullfp16", "-fullfp16", FPUVersion::VFPV5_FULLFP16, FPURestriction::SP_D16}, {"+fp64", "-fp64", FPUVersion::VFPV2, FPURestriction::D16}, {"+d32", "-d32", FPUVersion::VFPV3, FPURestriction::None}, }; for (const auto &Info: FPUFeatureInfoList) { if (FPUNames[FPUKind].FPUVer >= Info.MinVersion && FPUNames[FPUKind].Restriction <= Info.MaxRestriction) Features.push_back(Info.PlusName); else Features.push_back(Info.MinusName); } static const struct NeonFeatureNameInfo { const char *PlusName, *MinusName; NeonSupportLevel MinSupportLevel; } NeonFeatureInfoList[] = { {"+neon", "-neon", NeonSupportLevel::Neon}, {"+sha2", "-sha2", NeonSupportLevel::Crypto}, {"+aes", "-aes", NeonSupportLevel::Crypto}, }; for (const auto &Info: NeonFeatureInfoList) { if (FPUNames[FPUKind].NeonSupport >= Info.MinSupportLevel) Features.push_back(Info.PlusName); else Features.push_back(Info.MinusName); } return true; } ARM::FPUKind ARM::parseFPU(StringRef FPU) { StringRef Syn = getFPUSynonym(FPU); for (const auto &F : FPUNames) { if (Syn == F.Name) return F.ID; } return FK_INVALID; } ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(ARM::FPUKind FPUKind) { if (FPUKind >= FK_LAST) return NeonSupportLevel::None; return FPUNames[FPUKind].NeonSupport; } StringRef ARM::getFPUSynonym(StringRef FPU) { return StringSwitch(FPU) .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported .Case("vfp2", "vfpv2") .Case("vfp3", "vfpv3") .Case("vfp4", "vfpv4") .Case("vfp3-d16", "vfpv3-d16") .Case("vfp4-d16", "vfpv4-d16") .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16") .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16") .Case("fp5-sp-d16", "fpv5-sp-d16") .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16") // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3. .Case("neon-vfpv3", "neon") .Default(FPU); } StringRef ARM::getFPUName(ARM::FPUKind FPUKind) { if (FPUKind >= FK_LAST) return StringRef(); return FPUNames[FPUKind].Name; } ARM::FPUVersion ARM::getFPUVersion(ARM::FPUKind FPUKind) { if (FPUKind >= FK_LAST) return FPUVersion::NONE; return FPUNames[FPUKind].FPUVer; } ARM::FPURestriction ARM::getFPURestriction(ARM::FPUKind FPUKind) { if (FPUKind >= FK_LAST) return FPURestriction::None; return FPUNames[FPUKind].Restriction; } ARM::FPUKind ARM::getDefaultFPU(StringRef CPU, ARM::ArchKind AK) { if (CPU == "generic") return ARM::ARMArchNames[static_cast(AK)].DefaultFPU; return StringSwitch(CPU) #define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ .Case(NAME, DEFAULT_FPU) #include "llvm/TargetParser/ARMTargetParser.def" .Default(ARM::FK_INVALID); } uint64_t ARM::getDefaultExtensions(StringRef CPU, ARM::ArchKind AK) { if (CPU == "generic") return ARM::ARMArchNames[static_cast(AK)].ArchBaseExtensions; return StringSwitch(CPU) #define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \ .Case(NAME, \ ARMArchNames[static_cast(ArchKind::ID)].ArchBaseExtensions | \ DEFAULT_EXT) #include "llvm/TargetParser/ARMTargetParser.def" .Default(ARM::AEK_INVALID); } bool ARM::getHWDivFeatures(uint64_t HWDivKind, std::vector &Features) { if (HWDivKind == AEK_INVALID) return false; if (HWDivKind & AEK_HWDIVARM) Features.push_back("+hwdiv-arm"); else Features.push_back("-hwdiv-arm"); if (HWDivKind & AEK_HWDIVTHUMB) Features.push_back("+hwdiv"); else Features.push_back("-hwdiv"); return true; } bool ARM::getExtensionFeatures(uint64_t Extensions, std::vector &Features) { if (Extensions == AEK_INVALID) return false; for (const auto &AE : ARCHExtNames) { if ((Extensions & AE.ID) == AE.ID && !AE.Feature.empty()) Features.push_back(AE.Feature); else if (!AE.NegFeature.empty()) Features.push_back(AE.NegFeature); } return getHWDivFeatures(Extensions, Features); } StringRef ARM::getArchName(ARM::ArchKind AK) { return ARMArchNames[static_cast(AK)].Name; } StringRef ARM::getCPUAttr(ARM::ArchKind AK) { return ARMArchNames[static_cast(AK)].CPUAttr; } StringRef ARM::getSubArch(ARM::ArchKind AK) { return ARMArchNames[static_cast(AK)].getSubArch(); } unsigned ARM::getArchAttr(ARM::ArchKind AK) { return ARMArchNames[static_cast(AK)].ArchAttr; } StringRef ARM::getArchExtName(uint64_t ArchExtKind) { for (const auto &AE : ARCHExtNames) { if (ArchExtKind == AE.ID) return AE.Name; } return StringRef(); } static bool stripNegationPrefix(StringRef &Name) { return Name.consume_front("no"); } StringRef ARM::getArchExtFeature(StringRef ArchExt) { bool Negated = stripNegationPrefix(ArchExt); for (const auto &AE : ARCHExtNames) { if (!AE.Feature.empty() && ArchExt == AE.Name) return StringRef(Negated ? AE.NegFeature : AE.Feature); } return StringRef(); } static ARM::FPUKind findDoublePrecisionFPU(ARM::FPUKind InputFPUKind) { if (InputFPUKind == ARM::FK_INVALID || InputFPUKind == ARM::FK_NONE) return ARM::FK_INVALID; const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind]; // If the input FPU already supports double-precision, then there // isn't any different FPU we can return here. if (ARM::isDoublePrecision(InputFPU.Restriction)) return InputFPUKind; // Otherwise, look for an FPU entry with all the same fields, except // that it supports double precision. for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) { if (CandidateFPU.FPUVer == InputFPU.FPUVer && CandidateFPU.NeonSupport == InputFPU.NeonSupport && ARM::has32Regs(CandidateFPU.Restriction) == ARM::has32Regs(InputFPU.Restriction) && ARM::isDoublePrecision(CandidateFPU.Restriction)) { return CandidateFPU.ID; } } // nothing found return ARM::FK_INVALID; } static ARM::FPUKind findSinglePrecisionFPU(ARM::FPUKind InputFPUKind) { if (InputFPUKind == ARM::FK_INVALID || InputFPUKind == ARM::FK_NONE) return ARM::FK_INVALID; const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind]; // If the input FPU already is single-precision only, then there // isn't any different FPU we can return here. if (!ARM::isDoublePrecision(InputFPU.Restriction)) return InputFPUKind; // Otherwise, look for an FPU entry with all the same fields, except // that it does not support double precision. for (const ARM::FPUName &CandidateFPU : ARM::FPUNames) { if (CandidateFPU.FPUVer == InputFPU.FPUVer && CandidateFPU.NeonSupport == InputFPU.NeonSupport && ARM::has32Regs(CandidateFPU.Restriction) == ARM::has32Regs(InputFPU.Restriction) && !ARM::isDoublePrecision(CandidateFPU.Restriction)) { return CandidateFPU.ID; } } // nothing found return ARM::FK_INVALID; } bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt, std::vector &Features, ARM::FPUKind &ArgFPUKind) { size_t StartingNumFeatures = Features.size(); const bool Negated = stripNegationPrefix(ArchExt); uint64_t ID = parseArchExt(ArchExt); if (ID == AEK_INVALID) return false; for (const auto &AE : ARCHExtNames) { if (Negated) { if ((AE.ID & ID) == ID && !AE.NegFeature.empty()) Features.push_back(AE.NegFeature); } else { if ((AE.ID & ID) == AE.ID && !AE.Feature.empty()) Features.push_back(AE.Feature); } } if (CPU == "") CPU = "generic"; if (ArchExt == "fp" || ArchExt == "fp.dp") { const ARM::FPUKind DefaultFPU = getDefaultFPU(CPU, AK); ARM::FPUKind FPUKind; if (ArchExt == "fp.dp") { const bool IsDP = ArgFPUKind != ARM::FK_INVALID && ArgFPUKind != ARM::FK_NONE && isDoublePrecision(getFPURestriction(ArgFPUKind)); if (Negated) { /* If there is no FPU selected yet, we still need to set ArgFPUKind, as * leaving it as FK_INVALID, would cause default FPU to be selected * later and that could be double precision one. */ if (ArgFPUKind != ARM::FK_INVALID && !IsDP) return true; FPUKind = findSinglePrecisionFPU(DefaultFPU); if (FPUKind == ARM::FK_INVALID) FPUKind = ARM::FK_NONE; } else { if (IsDP) return true; FPUKind = findDoublePrecisionFPU(DefaultFPU); if (FPUKind == ARM::FK_INVALID) return false; } } else if (Negated) { FPUKind = ARM::FK_NONE; } else { FPUKind = DefaultFPU; } ArgFPUKind = FPUKind; return true; } return StartingNumFeatures != Features.size(); } ARM::ArchKind ARM::convertV9toV8(ARM::ArchKind AK) { if (getProfileKind(AK) != ProfileKind::A) return ARM::ArchKind::INVALID; if (AK < ARM::ArchKind::ARMV9A || AK > ARM::ArchKind::ARMV9_3A) return ARM::ArchKind::INVALID; unsigned AK_v8 = static_cast(ARM::ArchKind::ARMV8_5A); AK_v8 += static_cast(AK) - static_cast(ARM::ArchKind::ARMV9A); return static_cast(AK_v8); } StringRef ARM::getDefaultCPU(StringRef Arch) { ArchKind AK = parseArch(Arch); if (AK == ArchKind::INVALID) return StringRef(); // Look for multiple AKs to find the default for pair AK+Name. for (const auto &CPU : CPUNames) { if (CPU.ArchID == AK && CPU.Default) return CPU.Name; } // If we can't find a default then target the architecture instead return "generic"; } uint64_t ARM::parseHWDiv(StringRef HWDiv) { StringRef Syn = getHWDivSynonym(HWDiv); for (const auto &D : HWDivNames) { if (Syn == D.Name) return D.ID; } return AEK_INVALID; } uint64_t ARM::parseArchExt(StringRef ArchExt) { for (const auto &A : ARCHExtNames) { if (ArchExt == A.Name) return A.ID; } return AEK_INVALID; } ARM::ArchKind ARM::parseCPUArch(StringRef CPU) { for (const auto &C : CPUNames) { if (CPU == C.Name) return C.ArchID; } return ArchKind::INVALID; } void ARM::fillValidCPUArchList(SmallVectorImpl &Values) { for (const auto &Arch : CPUNames) { if (Arch.ArchID != ArchKind::INVALID) Values.push_back(Arch.Name); } } StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) { StringRef ArchName = CPU.empty() ? TT.getArchName() : getArchName(parseCPUArch(CPU)); if (TT.isOSBinFormatMachO()) { if (TT.getEnvironment() == Triple::EABI || TT.getOS() == Triple::UnknownOS || parseArchProfile(ArchName) == ProfileKind::M) return "aapcs"; if (TT.isWatchABI()) return "aapcs16"; return "apcs-gnu"; } else if (TT.isOSWindows()) // FIXME: this is invalid for WindowsCE. return "aapcs"; // Select the default based on the platform. switch (TT.getEnvironment()) { case Triple::Android: case Triple::GNUEABI: + case Triple::GNUEABIT64: case Triple::GNUEABIHF: + case Triple::GNUEABIHFT64: case Triple::MuslEABI: case Triple::MuslEABIHF: case Triple::OpenHOS: return "aapcs-linux"; case Triple::EABIHF: case Triple::EABI: return "aapcs"; default: if (TT.isOSNetBSD()) return "apcs-gnu"; if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOSHaiku() || TT.isOHOSFamily()) return "aapcs-linux"; return "aapcs"; } } StringRef ARM::getARMCPUForArch(const llvm::Triple &Triple, StringRef MArch) { if (MArch.empty()) MArch = Triple.getArchName(); MArch = llvm::ARM::getCanonicalArchName(MArch); // Some defaults are forced. switch (Triple.getOS()) { case llvm::Triple::FreeBSD: case llvm::Triple::NetBSD: case llvm::Triple::OpenBSD: case llvm::Triple::Haiku: if (!MArch.empty() && MArch == "v6") return "arm1176jzf-s"; if (!MArch.empty() && MArch == "v7") return "cortex-a8"; break; case llvm::Triple::Win32: // FIXME: this is invalid for WindowsCE if (llvm::ARM::parseArchVersion(MArch) <= 7) return "cortex-a9"; break; case llvm::Triple::IOS: case llvm::Triple::MacOSX: case llvm::Triple::TvOS: case llvm::Triple::WatchOS: case llvm::Triple::DriverKit: case llvm::Triple::XROS: if (MArch == "v7k") return "cortex-a7"; break; default: break; } if (MArch.empty()) return StringRef(); StringRef CPU = llvm::ARM::getDefaultCPU(MArch); if (!CPU.empty() && CPU != "invalid") return CPU; // If no specific architecture version is requested, return the minimum CPU // required by the OS and environment. switch (Triple.getOS()) { case llvm::Triple::Haiku: return "arm1176jzf-s"; case llvm::Triple::NetBSD: switch (Triple.getEnvironment()) { case llvm::Triple::EABI: case llvm::Triple::EABIHF: case llvm::Triple::GNUEABI: case llvm::Triple::GNUEABIHF: return "arm926ej-s"; default: return "strongarm"; } case llvm::Triple::NaCl: case llvm::Triple::OpenBSD: return "cortex-a8"; default: switch (Triple.getEnvironment()) { case llvm::Triple::EABIHF: case llvm::Triple::GNUEABIHF: + case llvm::Triple::GNUEABIHFT64: case llvm::Triple::MuslEABIHF: return "arm1176jzf-s"; default: return "arm7tdmi"; } } llvm_unreachable("invalid arch name"); } void ARM::PrintSupportedExtensions(StringMap DescMap) { outs() << "All available -march extensions for ARM\n\n" << " " << left_justify("Name", 20) << (DescMap.empty() ? "\n" : "Description\n"); for (const auto &Ext : ARCHExtNames) { // Extensions without a feature cannot be used with -march. if (!Ext.Feature.empty()) { std::string Description = DescMap[Ext.Name].str(); outs() << " " << format(Description.empty() ? "%s\n" : "%-20s%s\n", Ext.Name.str().c_str(), Description.c_str()); } } } diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp index bf89aace65e5..21d6c74b5956 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/Triple.cpp @@ -1,2122 +1,2128 @@ //===--- Triple.cpp - Target triple helper class --------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "llvm/TargetParser/Triple.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/VersionTuple.h" #include "llvm/TargetParser/ARMTargetParser.h" #include "llvm/TargetParser/ARMTargetParserCommon.h" #include "llvm/TargetParser/Host.h" #include #include using namespace llvm; StringRef Triple::getArchTypeName(ArchType Kind) { switch (Kind) { case UnknownArch: return "unknown"; case aarch64: return "aarch64"; case aarch64_32: return "aarch64_32"; case aarch64_be: return "aarch64_be"; case amdgcn: return "amdgcn"; case amdil64: return "amdil64"; case amdil: return "amdil"; case arc: return "arc"; case arm: return "arm"; case armeb: return "armeb"; case avr: return "avr"; case bpfeb: return "bpfeb"; case bpfel: return "bpfel"; case csky: return "csky"; case dxil: return "dxil"; case hexagon: return "hexagon"; case hsail64: return "hsail64"; case hsail: return "hsail"; case kalimba: return "kalimba"; case lanai: return "lanai"; case le32: return "le32"; case le64: return "le64"; case loongarch32: return "loongarch32"; case loongarch64: return "loongarch64"; case m68k: return "m68k"; case mips64: return "mips64"; case mips64el: return "mips64el"; case mips: return "mips"; case mipsel: return "mipsel"; case msp430: return "msp430"; case nvptx64: return "nvptx64"; case nvptx: return "nvptx"; case ppc64: return "powerpc64"; case ppc64le: return "powerpc64le"; case ppc: return "powerpc"; case ppcle: return "powerpcle"; case r600: return "r600"; case renderscript32: return "renderscript32"; case renderscript64: return "renderscript64"; case riscv32: return "riscv32"; case riscv64: return "riscv64"; case shave: return "shave"; case sparc: return "sparc"; case sparcel: return "sparcel"; case sparcv9: return "sparcv9"; case spir64: return "spir64"; case spir: return "spir"; case spirv: return "spirv"; case spirv32: return "spirv32"; case spirv64: return "spirv64"; case systemz: return "s390x"; case tce: return "tce"; case tcele: return "tcele"; case thumb: return "thumb"; case thumbeb: return "thumbeb"; case ve: return "ve"; case wasm32: return "wasm32"; case wasm64: return "wasm64"; case x86: return "i386"; case x86_64: return "x86_64"; case xcore: return "xcore"; case xtensa: return "xtensa"; } llvm_unreachable("Invalid ArchType!"); } StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) { switch (Kind) { case Triple::mips: if (SubArch == MipsSubArch_r6) return "mipsisa32r6"; break; case Triple::mipsel: if (SubArch == MipsSubArch_r6) return "mipsisa32r6el"; break; case Triple::mips64: if (SubArch == MipsSubArch_r6) return "mipsisa64r6"; break; case Triple::mips64el: if (SubArch == MipsSubArch_r6) return "mipsisa64r6el"; break; case Triple::aarch64: if (SubArch == AArch64SubArch_arm64ec) return "arm64ec"; if (SubArch == AArch64SubArch_arm64e) return "arm64e"; break; case Triple::dxil: switch (SubArch) { case Triple::NoSubArch: case Triple::DXILSubArch_v1_0: return "dxilv1.0"; case Triple::DXILSubArch_v1_1: return "dxilv1.1"; case Triple::DXILSubArch_v1_2: return "dxilv1.2"; case Triple::DXILSubArch_v1_3: return "dxilv1.3"; case Triple::DXILSubArch_v1_4: return "dxilv1.4"; case Triple::DXILSubArch_v1_5: return "dxilv1.5"; case Triple::DXILSubArch_v1_6: return "dxilv1.6"; case Triple::DXILSubArch_v1_7: return "dxilv1.7"; case Triple::DXILSubArch_v1_8: return "dxilv1.8"; default: break; } break; default: break; } return getArchTypeName(Kind); } StringRef Triple::getArchTypePrefix(ArchType Kind) { switch (Kind) { default: return StringRef(); case aarch64: case aarch64_be: case aarch64_32: return "aarch64"; case arc: return "arc"; case arm: case armeb: case thumb: case thumbeb: return "arm"; case avr: return "avr"; case ppc64: case ppc64le: case ppc: case ppcle: return "ppc"; case m68k: return "m68k"; case mips: case mipsel: case mips64: case mips64el: return "mips"; case hexagon: return "hexagon"; case amdgcn: return "amdgcn"; case r600: return "r600"; case bpfel: case bpfeb: return "bpf"; case sparcv9: case sparcel: case sparc: return "sparc"; case systemz: return "s390"; case x86: case x86_64: return "x86"; case xcore: return "xcore"; // NVPTX intrinsics are namespaced under nvvm. case nvptx: return "nvvm"; case nvptx64: return "nvvm"; case le32: return "le32"; case le64: return "le64"; case amdil: case amdil64: return "amdil"; case hsail: case hsail64: return "hsail"; case spir: case spir64: return "spir"; case spirv: case spirv32: case spirv64: return "spv"; case kalimba: return "kalimba"; case lanai: return "lanai"; case shave: return "shave"; case wasm32: case wasm64: return "wasm"; case riscv32: case riscv64: return "riscv"; case ve: return "ve"; case csky: return "csky"; case loongarch32: case loongarch64: return "loongarch"; case dxil: return "dx"; case xtensa: return "xtensa"; } } StringRef Triple::getVendorTypeName(VendorType Kind) { switch (Kind) { case UnknownVendor: return "unknown"; case AMD: return "amd"; case Apple: return "apple"; case CSR: return "csr"; case Freescale: return "fsl"; case IBM: return "ibm"; case ImaginationTechnologies: return "img"; case Mesa: return "mesa"; case MipsTechnologies: return "mti"; case NVIDIA: return "nvidia"; case OpenEmbedded: return "oe"; case PC: return "pc"; case SCEI: return "scei"; case SUSE: return "suse"; } llvm_unreachable("Invalid VendorType!"); } StringRef Triple::getOSTypeName(OSType Kind) { switch (Kind) { case UnknownOS: return "unknown"; case AIX: return "aix"; case AMDHSA: return "amdhsa"; case AMDPAL: return "amdpal"; case BridgeOS: return "bridgeos"; case CUDA: return "cuda"; case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case DriverKit: return "driverkit"; case ELFIAMCU: return "elfiamcu"; case Emscripten: return "emscripten"; case FreeBSD: return "freebsd"; case Fuchsia: return "fuchsia"; case Haiku: return "haiku"; case HermitCore: return "hermit"; case Hurd: return "hurd"; case IOS: return "ios"; case KFreeBSD: return "kfreebsd"; case Linux: return "linux"; case Lv2: return "lv2"; case MacOSX: return "macosx"; case Mesa3D: return "mesa3d"; case NVCL: return "nvcl"; case NaCl: return "nacl"; case NetBSD: return "netbsd"; case OpenBSD: return "openbsd"; case PS4: return "ps4"; case PS5: return "ps5"; case RTEMS: return "rtems"; case Solaris: return "solaris"; case Serenity: return "serenity"; case TvOS: return "tvos"; case UEFI: return "uefi"; case WASI: return "wasi"; case WatchOS: return "watchos"; case Win32: return "windows"; case ZOS: return "zos"; case ShaderModel: return "shadermodel"; case LiteOS: return "liteos"; case XROS: return "xros"; case Vulkan: return "vulkan"; } llvm_unreachable("Invalid OSType"); } StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) { switch (Kind) { case UnknownEnvironment: return "unknown"; case Android: return "android"; case CODE16: return "code16"; case CoreCLR: return "coreclr"; case Cygnus: return "cygnus"; case EABI: return "eabi"; case EABIHF: return "eabihf"; case GNU: return "gnu"; + case GNUT64: return "gnut64"; case GNUABI64: return "gnuabi64"; case GNUABIN32: return "gnuabin32"; case GNUEABI: return "gnueabi"; + case GNUEABIT64: return "gnueabit64"; case GNUEABIHF: return "gnueabihf"; + case GNUEABIHFT64: return "gnueabihft64"; case GNUF32: return "gnuf32"; case GNUF64: return "gnuf64"; case GNUSF: return "gnusf"; case GNUX32: return "gnux32"; case GNUILP32: return "gnu_ilp32"; case Itanium: return "itanium"; case MSVC: return "msvc"; case MacABI: return "macabi"; case Musl: return "musl"; case MuslEABI: return "musleabi"; case MuslEABIHF: return "musleabihf"; case MuslX32: return "muslx32"; case Simulator: return "simulator"; case Pixel: return "pixel"; case Vertex: return "vertex"; case Geometry: return "geometry"; case Hull: return "hull"; case Domain: return "domain"; case Compute: return "compute"; case Library: return "library"; case RayGeneration: return "raygeneration"; case Intersection: return "intersection"; case AnyHit: return "anyhit"; case ClosestHit: return "closesthit"; case Miss: return "miss"; case Callable: return "callable"; case Mesh: return "mesh"; case Amplification: return "amplification"; case OpenCL: return "opencl"; case OpenHOS: return "ohos"; case PAuthTest: return "pauthtest"; } llvm_unreachable("Invalid EnvironmentType!"); } StringRef Triple::getObjectFormatTypeName(ObjectFormatType Kind) { switch (Kind) { case UnknownObjectFormat: return ""; case COFF: return "coff"; case ELF: return "elf"; case GOFF: return "goff"; case MachO: return "macho"; case Wasm: return "wasm"; case XCOFF: return "xcoff"; case DXContainer: return "dxcontainer"; case SPIRV: return "spirv"; } llvm_unreachable("unknown object format type"); } static Triple::ArchType parseBPFArch(StringRef ArchName) { if (ArchName == "bpf") { if (sys::IsLittleEndianHost) return Triple::bpfel; else return Triple::bpfeb; } else if (ArchName == "bpf_be" || ArchName == "bpfeb") { return Triple::bpfeb; } else if (ArchName == "bpf_le" || ArchName == "bpfel") { return Triple::bpfel; } else { return Triple::UnknownArch; } } Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { Triple::ArchType BPFArch(parseBPFArch(Name)); return StringSwitch(Name) .Case("aarch64", aarch64) .Case("aarch64_be", aarch64_be) .Case("aarch64_32", aarch64_32) .Case("arc", arc) .Case("arm64", aarch64) // "arm64" is an alias for "aarch64" .Case("arm64_32", aarch64_32) .Case("arm", arm) .Case("armeb", armeb) .Case("avr", avr) .StartsWith("bpf", BPFArch) .Case("m68k", m68k) .Case("mips", mips) .Case("mipsel", mipsel) .Case("mips64", mips64) .Case("mips64el", mips64el) .Case("msp430", msp430) .Case("ppc64", ppc64) .Case("ppc32", ppc) .Case("ppc", ppc) .Case("ppc32le", ppcle) .Case("ppcle", ppcle) .Case("ppc64le", ppc64le) .Case("r600", r600) .Case("amdgcn", amdgcn) .Case("riscv32", riscv32) .Case("riscv64", riscv64) .Case("hexagon", hexagon) .Case("sparc", sparc) .Case("sparcel", sparcel) .Case("sparcv9", sparcv9) .Case("s390x", systemz) .Case("systemz", systemz) .Case("tce", tce) .Case("tcele", tcele) .Case("thumb", thumb) .Case("thumbeb", thumbeb) .Case("x86", x86) .Case("i386", x86) .Case("x86-64", x86_64) .Case("xcore", xcore) .Case("nvptx", nvptx) .Case("nvptx64", nvptx64) .Case("le32", le32) .Case("le64", le64) .Case("amdil", amdil) .Case("amdil64", amdil64) .Case("hsail", hsail) .Case("hsail64", hsail64) .Case("spir", spir) .Case("spir64", spir64) .Case("spirv", spirv) .Case("spirv32", spirv32) .Case("spirv64", spirv64) .Case("kalimba", kalimba) .Case("lanai", lanai) .Case("shave", shave) .Case("wasm32", wasm32) .Case("wasm64", wasm64) .Case("renderscript32", renderscript32) .Case("renderscript64", renderscript64) .Case("ve", ve) .Case("csky", csky) .Case("loongarch32", loongarch32) .Case("loongarch64", loongarch64) .Case("dxil", dxil) .Case("xtensa", xtensa) .Default(UnknownArch); } static Triple::ArchType parseARMArch(StringRef ArchName) { ARM::ISAKind ISA = ARM::parseArchISA(ArchName); ARM::EndianKind ENDIAN = ARM::parseArchEndian(ArchName); Triple::ArchType arch = Triple::UnknownArch; switch (ENDIAN) { case ARM::EndianKind::LITTLE: { switch (ISA) { case ARM::ISAKind::ARM: arch = Triple::arm; break; case ARM::ISAKind::THUMB: arch = Triple::thumb; break; case ARM::ISAKind::AARCH64: arch = Triple::aarch64; break; case ARM::ISAKind::INVALID: break; } break; } case ARM::EndianKind::BIG: { switch (ISA) { case ARM::ISAKind::ARM: arch = Triple::armeb; break; case ARM::ISAKind::THUMB: arch = Triple::thumbeb; break; case ARM::ISAKind::AARCH64: arch = Triple::aarch64_be; break; case ARM::ISAKind::INVALID: break; } break; } case ARM::EndianKind::INVALID: { break; } } ArchName = ARM::getCanonicalArchName(ArchName); if (ArchName.empty()) return Triple::UnknownArch; // Thumb only exists in v4+ if (ISA == ARM::ISAKind::THUMB && (ArchName.starts_with("v2") || ArchName.starts_with("v3"))) return Triple::UnknownArch; // Thumb only for v6m ARM::ProfileKind Profile = ARM::parseArchProfile(ArchName); unsigned Version = ARM::parseArchVersion(ArchName); if (Profile == ARM::ProfileKind::M && Version == 6) { if (ENDIAN == ARM::EndianKind::BIG) return Triple::thumbeb; else return Triple::thumb; } return arch; } static Triple::ArchType parseArch(StringRef ArchName) { auto AT = StringSwitch(ArchName) .Cases("i386", "i486", "i586", "i686", Triple::x86) // FIXME: Do we need to support these? .Cases("i786", "i886", "i986", Triple::x86) .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64) .Cases("powerpc", "powerpcspe", "ppc", "ppc32", Triple::ppc) .Cases("powerpcle", "ppcle", "ppc32le", Triple::ppcle) .Cases("powerpc64", "ppu", "ppc64", Triple::ppc64) .Cases("powerpc64le", "ppc64le", Triple::ppc64le) .Case("xscale", Triple::arm) .Case("xscaleeb", Triple::armeb) .Case("aarch64", Triple::aarch64) .Case("aarch64_be", Triple::aarch64_be) .Case("aarch64_32", Triple::aarch64_32) .Case("arc", Triple::arc) .Case("arm64", Triple::aarch64) .Case("arm64_32", Triple::aarch64_32) .Case("arm64e", Triple::aarch64) .Case("arm64ec", Triple::aarch64) .Case("arm", Triple::arm) .Case("armeb", Triple::armeb) .Case("thumb", Triple::thumb) .Case("thumbeb", Triple::thumbeb) .Case("avr", Triple::avr) .Case("m68k", Triple::m68k) .Case("msp430", Triple::msp430) .Cases("mips", "mipseb", "mipsallegrex", "mipsisa32r6", "mipsr6", Triple::mips) .Cases("mipsel", "mipsallegrexel", "mipsisa32r6el", "mipsr6el", Triple::mipsel) .Cases("mips64", "mips64eb", "mipsn32", "mipsisa64r6", "mips64r6", "mipsn32r6", Triple::mips64) .Cases("mips64el", "mipsn32el", "mipsisa64r6el", "mips64r6el", "mipsn32r6el", Triple::mips64el) .Case("r600", Triple::r600) .Case("amdgcn", Triple::amdgcn) .Case("riscv32", Triple::riscv32) .Case("riscv64", Triple::riscv64) .Case("hexagon", Triple::hexagon) .Cases("s390x", "systemz", Triple::systemz) .Case("sparc", Triple::sparc) .Case("sparcel", Triple::sparcel) .Cases("sparcv9", "sparc64", Triple::sparcv9) .Case("tce", Triple::tce) .Case("tcele", Triple::tcele) .Case("xcore", Triple::xcore) .Case("nvptx", Triple::nvptx) .Case("nvptx64", Triple::nvptx64) .Case("le32", Triple::le32) .Case("le64", Triple::le64) .Case("amdil", Triple::amdil) .Case("amdil64", Triple::amdil64) .Case("hsail", Triple::hsail) .Case("hsail64", Triple::hsail64) .Case("spir", Triple::spir) .Case("spir64", Triple::spir64) .Cases("spirv", "spirv1.5", "spirv1.6", Triple::spirv) .Cases("spirv32", "spirv32v1.0", "spirv32v1.1", "spirv32v1.2", "spirv32v1.3", "spirv32v1.4", "spirv32v1.5", "spirv32v1.6", Triple::spirv32) .Cases("spirv64", "spirv64v1.0", "spirv64v1.1", "spirv64v1.2", "spirv64v1.3", "spirv64v1.4", "spirv64v1.5", "spirv64v1.6", Triple::spirv64) .StartsWith("kalimba", Triple::kalimba) .Case("lanai", Triple::lanai) .Case("renderscript32", Triple::renderscript32) .Case("renderscript64", Triple::renderscript64) .Case("shave", Triple::shave) .Case("ve", Triple::ve) .Case("wasm32", Triple::wasm32) .Case("wasm64", Triple::wasm64) .Case("csky", Triple::csky) .Case("loongarch32", Triple::loongarch32) .Case("loongarch64", Triple::loongarch64) .Cases("dxil", "dxilv1.0", "dxilv1.1", "dxilv1.2", "dxilv1.3", "dxilv1.4", "dxilv1.5", "dxilv1.6", "dxilv1.7", "dxilv1.8", Triple::dxil) .Case("xtensa", Triple::xtensa) .Default(Triple::UnknownArch); // Some architectures require special parsing logic just to compute the // ArchType result. if (AT == Triple::UnknownArch) { if (ArchName.starts_with("arm") || ArchName.starts_with("thumb") || ArchName.starts_with("aarch64")) return parseARMArch(ArchName); if (ArchName.starts_with("bpf")) return parseBPFArch(ArchName); } return AT; } static Triple::VendorType parseVendor(StringRef VendorName) { return StringSwitch(VendorName) .Case("apple", Triple::Apple) .Case("pc", Triple::PC) .Case("scei", Triple::SCEI) .Case("sie", Triple::SCEI) .Case("fsl", Triple::Freescale) .Case("ibm", Triple::IBM) .Case("img", Triple::ImaginationTechnologies) .Case("mti", Triple::MipsTechnologies) .Case("nvidia", Triple::NVIDIA) .Case("csr", Triple::CSR) .Case("amd", Triple::AMD) .Case("mesa", Triple::Mesa) .Case("suse", Triple::SUSE) .Case("oe", Triple::OpenEmbedded) .Default(Triple::UnknownVendor); } static Triple::OSType parseOS(StringRef OSName) { return StringSwitch(OSName) .StartsWith("darwin", Triple::Darwin) .StartsWith("dragonfly", Triple::DragonFly) .StartsWith("freebsd", Triple::FreeBSD) .StartsWith("fuchsia", Triple::Fuchsia) .StartsWith("ios", Triple::IOS) .StartsWith("kfreebsd", Triple::KFreeBSD) .StartsWith("linux", Triple::Linux) .StartsWith("lv2", Triple::Lv2) .StartsWith("macos", Triple::MacOSX) .StartsWith("netbsd", Triple::NetBSD) .StartsWith("openbsd", Triple::OpenBSD) .StartsWith("solaris", Triple::Solaris) .StartsWith("uefi", Triple::UEFI) .StartsWith("win32", Triple::Win32) .StartsWith("windows", Triple::Win32) .StartsWith("zos", Triple::ZOS) .StartsWith("haiku", Triple::Haiku) .StartsWith("rtems", Triple::RTEMS) .StartsWith("nacl", Triple::NaCl) .StartsWith("aix", Triple::AIX) .StartsWith("cuda", Triple::CUDA) .StartsWith("nvcl", Triple::NVCL) .StartsWith("amdhsa", Triple::AMDHSA) .StartsWith("ps4", Triple::PS4) .StartsWith("ps5", Triple::PS5) .StartsWith("elfiamcu", Triple::ELFIAMCU) .StartsWith("tvos", Triple::TvOS) .StartsWith("watchos", Triple::WatchOS) .StartsWith("bridgeos", Triple::BridgeOS) .StartsWith("driverkit", Triple::DriverKit) .StartsWith("xros", Triple::XROS) .StartsWith("visionos", Triple::XROS) .StartsWith("mesa3d", Triple::Mesa3D) .StartsWith("amdpal", Triple::AMDPAL) .StartsWith("hermit", Triple::HermitCore) .StartsWith("hurd", Triple::Hurd) .StartsWith("wasi", Triple::WASI) .StartsWith("emscripten", Triple::Emscripten) .StartsWith("shadermodel", Triple::ShaderModel) .StartsWith("liteos", Triple::LiteOS) .StartsWith("serenity", Triple::Serenity) .StartsWith("vulkan", Triple::Vulkan) .Default(Triple::UnknownOS); } static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) { return StringSwitch(EnvironmentName) .StartsWith("eabihf", Triple::EABIHF) .StartsWith("eabi", Triple::EABI) .StartsWith("gnuabin32", Triple::GNUABIN32) .StartsWith("gnuabi64", Triple::GNUABI64) + .StartsWith("gnueabihft64", Triple::GNUEABIHFT64) .StartsWith("gnueabihf", Triple::GNUEABIHF) + .StartsWith("gnueabit64", Triple::GNUEABIT64) .StartsWith("gnueabi", Triple::GNUEABI) .StartsWith("gnuf32", Triple::GNUF32) .StartsWith("gnuf64", Triple::GNUF64) .StartsWith("gnusf", Triple::GNUSF) .StartsWith("gnux32", Triple::GNUX32) .StartsWith("gnu_ilp32", Triple::GNUILP32) .StartsWith("code16", Triple::CODE16) + .StartsWith("gnut64", Triple::GNUT64) .StartsWith("gnu", Triple::GNU) .StartsWith("android", Triple::Android) .StartsWith("musleabihf", Triple::MuslEABIHF) .StartsWith("musleabi", Triple::MuslEABI) .StartsWith("muslx32", Triple::MuslX32) .StartsWith("musl", Triple::Musl) .StartsWith("msvc", Triple::MSVC) .StartsWith("itanium", Triple::Itanium) .StartsWith("cygnus", Triple::Cygnus) .StartsWith("coreclr", Triple::CoreCLR) .StartsWith("simulator", Triple::Simulator) .StartsWith("macabi", Triple::MacABI) .StartsWith("pixel", Triple::Pixel) .StartsWith("vertex", Triple::Vertex) .StartsWith("geometry", Triple::Geometry) .StartsWith("hull", Triple::Hull) .StartsWith("domain", Triple::Domain) .StartsWith("compute", Triple::Compute) .StartsWith("library", Triple::Library) .StartsWith("raygeneration", Triple::RayGeneration) .StartsWith("intersection", Triple::Intersection) .StartsWith("anyhit", Triple::AnyHit) .StartsWith("closesthit", Triple::ClosestHit) .StartsWith("miss", Triple::Miss) .StartsWith("callable", Triple::Callable) .StartsWith("mesh", Triple::Mesh) .StartsWith("amplification", Triple::Amplification) .StartsWith("opencl", Triple::OpenCL) .StartsWith("ohos", Triple::OpenHOS) .StartsWith("pauthtest", Triple::PAuthTest) .Default(Triple::UnknownEnvironment); } static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) { return StringSwitch(EnvironmentName) // "xcoff" must come before "coff" because of the order-dependendent // pattern matching. .EndsWith("xcoff", Triple::XCOFF) .EndsWith("coff", Triple::COFF) .EndsWith("elf", Triple::ELF) .EndsWith("goff", Triple::GOFF) .EndsWith("macho", Triple::MachO) .EndsWith("wasm", Triple::Wasm) .EndsWith("spirv", Triple::SPIRV) .Default(Triple::UnknownObjectFormat); } static Triple::SubArchType parseSubArch(StringRef SubArchName) { if (SubArchName.starts_with("mips") && (SubArchName.ends_with("r6el") || SubArchName.ends_with("r6"))) return Triple::MipsSubArch_r6; if (SubArchName == "powerpcspe") return Triple::PPCSubArch_spe; if (SubArchName == "arm64e") return Triple::AArch64SubArch_arm64e; if (SubArchName == "arm64ec") return Triple::AArch64SubArch_arm64ec; if (SubArchName.starts_with("spirv")) return StringSwitch(SubArchName) .EndsWith("v1.0", Triple::SPIRVSubArch_v10) .EndsWith("v1.1", Triple::SPIRVSubArch_v11) .EndsWith("v1.2", Triple::SPIRVSubArch_v12) .EndsWith("v1.3", Triple::SPIRVSubArch_v13) .EndsWith("v1.4", Triple::SPIRVSubArch_v14) .EndsWith("v1.5", Triple::SPIRVSubArch_v15) .EndsWith("v1.6", Triple::SPIRVSubArch_v16) .Default(Triple::NoSubArch); if (SubArchName.starts_with("dxil")) return StringSwitch(SubArchName) .EndsWith("v1.0", Triple::DXILSubArch_v1_0) .EndsWith("v1.1", Triple::DXILSubArch_v1_1) .EndsWith("v1.2", Triple::DXILSubArch_v1_2) .EndsWith("v1.3", Triple::DXILSubArch_v1_3) .EndsWith("v1.4", Triple::DXILSubArch_v1_4) .EndsWith("v1.5", Triple::DXILSubArch_v1_5) .EndsWith("v1.6", Triple::DXILSubArch_v1_6) .EndsWith("v1.7", Triple::DXILSubArch_v1_7) .EndsWith("v1.8", Triple::DXILSubArch_v1_8) .Default(Triple::NoSubArch); StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName); // For now, this is the small part. Early return. if (ARMSubArch.empty()) return StringSwitch(SubArchName) .EndsWith("kalimba3", Triple::KalimbaSubArch_v3) .EndsWith("kalimba4", Triple::KalimbaSubArch_v4) .EndsWith("kalimba5", Triple::KalimbaSubArch_v5) .Default(Triple::NoSubArch); // ARM sub arch. switch(ARM::parseArch(ARMSubArch)) { case ARM::ArchKind::ARMV4: return Triple::NoSubArch; case ARM::ArchKind::ARMV4T: return Triple::ARMSubArch_v4t; case ARM::ArchKind::ARMV5T: return Triple::ARMSubArch_v5; case ARM::ArchKind::ARMV5TE: case ARM::ArchKind::IWMMXT: case ARM::ArchKind::IWMMXT2: case ARM::ArchKind::XSCALE: case ARM::ArchKind::ARMV5TEJ: return Triple::ARMSubArch_v5te; case ARM::ArchKind::ARMV6: return Triple::ARMSubArch_v6; case ARM::ArchKind::ARMV6K: case ARM::ArchKind::ARMV6KZ: return Triple::ARMSubArch_v6k; case ARM::ArchKind::ARMV6T2: return Triple::ARMSubArch_v6t2; case ARM::ArchKind::ARMV6M: return Triple::ARMSubArch_v6m; case ARM::ArchKind::ARMV7A: case ARM::ArchKind::ARMV7R: return Triple::ARMSubArch_v7; case ARM::ArchKind::ARMV7VE: return Triple::ARMSubArch_v7ve; case ARM::ArchKind::ARMV7K: return Triple::ARMSubArch_v7k; case ARM::ArchKind::ARMV7M: return Triple::ARMSubArch_v7m; case ARM::ArchKind::ARMV7S: return Triple::ARMSubArch_v7s; case ARM::ArchKind::ARMV7EM: return Triple::ARMSubArch_v7em; case ARM::ArchKind::ARMV8A: return Triple::ARMSubArch_v8; case ARM::ArchKind::ARMV8_1A: return Triple::ARMSubArch_v8_1a; case ARM::ArchKind::ARMV8_2A: return Triple::ARMSubArch_v8_2a; case ARM::ArchKind::ARMV8_3A: return Triple::ARMSubArch_v8_3a; case ARM::ArchKind::ARMV8_4A: return Triple::ARMSubArch_v8_4a; case ARM::ArchKind::ARMV8_5A: return Triple::ARMSubArch_v8_5a; case ARM::ArchKind::ARMV8_6A: return Triple::ARMSubArch_v8_6a; case ARM::ArchKind::ARMV8_7A: return Triple::ARMSubArch_v8_7a; case ARM::ArchKind::ARMV8_8A: return Triple::ARMSubArch_v8_8a; case ARM::ArchKind::ARMV8_9A: return Triple::ARMSubArch_v8_9a; case ARM::ArchKind::ARMV9A: return Triple::ARMSubArch_v9; case ARM::ArchKind::ARMV9_1A: return Triple::ARMSubArch_v9_1a; case ARM::ArchKind::ARMV9_2A: return Triple::ARMSubArch_v9_2a; case ARM::ArchKind::ARMV9_3A: return Triple::ARMSubArch_v9_3a; case ARM::ArchKind::ARMV9_4A: return Triple::ARMSubArch_v9_4a; case ARM::ArchKind::ARMV9_5A: return Triple::ARMSubArch_v9_5a; case ARM::ArchKind::ARMV8R: return Triple::ARMSubArch_v8r; case ARM::ArchKind::ARMV8MBaseline: return Triple::ARMSubArch_v8m_baseline; case ARM::ArchKind::ARMV8MMainline: return Triple::ARMSubArch_v8m_mainline; case ARM::ArchKind::ARMV8_1MMainline: return Triple::ARMSubArch_v8_1m_mainline; default: return Triple::NoSubArch; } } static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { switch (T.getArch()) { case Triple::UnknownArch: case Triple::aarch64: case Triple::aarch64_32: case Triple::arm: case Triple::thumb: case Triple::x86: case Triple::x86_64: switch (T.getOS()) { case Triple::Win32: case Triple::UEFI: return Triple::COFF; default: return T.isOSDarwin() ? Triple::MachO : Triple::ELF; } case Triple::aarch64_be: case Triple::amdgcn: case Triple::amdil64: case Triple::amdil: case Triple::arc: case Triple::armeb: case Triple::avr: case Triple::bpfeb: case Triple::bpfel: case Triple::csky: case Triple::hexagon: case Triple::hsail64: case Triple::hsail: case Triple::kalimba: case Triple::lanai: case Triple::le32: case Triple::le64: case Triple::loongarch32: case Triple::loongarch64: case Triple::m68k: case Triple::mips64: case Triple::mips64el: case Triple::mips: case Triple::mipsel: case Triple::msp430: case Triple::nvptx64: case Triple::nvptx: case Triple::ppc64le: case Triple::ppcle: case Triple::r600: case Triple::renderscript32: case Triple::renderscript64: case Triple::riscv32: case Triple::riscv64: case Triple::shave: case Triple::sparc: case Triple::sparcel: case Triple::sparcv9: case Triple::spir64: case Triple::spir: case Triple::tce: case Triple::tcele: case Triple::thumbeb: case Triple::ve: case Triple::xcore: case Triple::xtensa: return Triple::ELF; case Triple::ppc64: case Triple::ppc: if (T.isOSAIX()) return Triple::XCOFF; if (T.isOSDarwin()) return Triple::MachO; return Triple::ELF; case Triple::systemz: if (T.isOSzOS()) return Triple::GOFF; return Triple::ELF; case Triple::wasm32: case Triple::wasm64: return Triple::Wasm; case Triple::spirv: case Triple::spirv32: case Triple::spirv64: return Triple::SPIRV; case Triple::dxil: return Triple::DXContainer; } llvm_unreachable("unknown architecture"); } /// Construct a triple from the string representation provided. /// /// This stores the string representation and parses the various pieces into /// enum members. Triple::Triple(const Twine &Str) : Data(Str.str()), Arch(UnknownArch), SubArch(NoSubArch), Vendor(UnknownVendor), OS(UnknownOS), Environment(UnknownEnvironment), ObjectFormat(UnknownObjectFormat) { // Do minimal parsing by hand here. SmallVector Components; StringRef(Data).split(Components, '-', /*MaxSplit*/ 3); if (Components.size() > 0) { Arch = parseArch(Components[0]); SubArch = parseSubArch(Components[0]); if (Components.size() > 1) { Vendor = parseVendor(Components[1]); if (Components.size() > 2) { OS = parseOS(Components[2]); if (Components.size() > 3) { Environment = parseEnvironment(Components[3]); ObjectFormat = parseFormat(Components[3]); } } } else { Environment = StringSwitch(Components[0]) .StartsWith("mipsn32", Triple::GNUABIN32) .StartsWith("mips64", Triple::GNUABI64) .StartsWith("mipsisa64", Triple::GNUABI64) .StartsWith("mipsisa32", Triple::GNU) .Cases("mips", "mipsel", "mipsr6", "mipsr6el", Triple::GNU) .Default(UnknownEnvironment); } } if (ObjectFormat == UnknownObjectFormat) ObjectFormat = getDefaultFormat(*this); } /// Construct a triple from string representations of the architecture, /// vendor, and OS. /// /// This joins each argument into a canonical string representation and parses /// them into enum members. It leaves the environment unknown and omits it from /// the string representation. Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr) : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()), Arch(parseArch(ArchStr.str())), SubArch(parseSubArch(ArchStr.str())), Vendor(parseVendor(VendorStr.str())), OS(parseOS(OSStr.str())), Environment(), ObjectFormat(Triple::UnknownObjectFormat) { ObjectFormat = getDefaultFormat(*this); } /// Construct a triple from string representations of the architecture, /// vendor, OS, and environment. /// /// This joins each argument into a canonical string representation and parses /// them into enum members. Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr, const Twine &EnvironmentStr) : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') + EnvironmentStr).str()), Arch(parseArch(ArchStr.str())), SubArch(parseSubArch(ArchStr.str())), Vendor(parseVendor(VendorStr.str())), OS(parseOS(OSStr.str())), Environment(parseEnvironment(EnvironmentStr.str())), ObjectFormat(parseFormat(EnvironmentStr.str())) { if (ObjectFormat == Triple::UnknownObjectFormat) ObjectFormat = getDefaultFormat(*this); } static VersionTuple parseVersionFromName(StringRef Name); static StringRef getDXILArchNameFromShaderModel(StringRef ShaderModelStr) { VersionTuple Ver = parseVersionFromName(ShaderModelStr.drop_front(strlen("shadermodel"))); // Default DXIL minor version when Shader Model version is anything other // than 6.[0...8] or 6.x (which translates to latest current SM version) const unsigned SMMajor = 6; if (!Ver.empty()) { if (Ver.getMajor() == SMMajor) { if (std::optional SMMinor = Ver.getMinor()) { switch (*SMMinor) { case 0: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_0); case 1: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_1); case 2: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_2); case 3: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_3); case 4: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_4); case 5: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_5); case 6: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_6); case 7: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_7); case 8: return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_8); default: report_fatal_error("Unsupported Shader Model version", false); } } } } else { // Special case: DXIL minor version is set to LatestCurrentDXILMinor for // shadermodel6.x is if (ShaderModelStr == "shadermodel6.x") { return Triple::getArchName(Triple::dxil, Triple::LatestDXILSubArch); } } // DXIL version corresponding to Shader Model version other than 6.Minor // is 1.0 return Triple::getArchName(Triple::dxil, Triple::DXILSubArch_v1_0); } std::string Triple::normalize(StringRef Str) { bool IsMinGW32 = false; bool IsCygwin = false; // Parse into components. SmallVector Components; Str.split(Components, '-'); // If the first component corresponds to a known architecture, preferentially // use it for the architecture. If the second component corresponds to a // known vendor, preferentially use it for the vendor, etc. This avoids silly // component movement when a component parses as (eg) both a valid arch and a // valid os. ArchType Arch = UnknownArch; if (Components.size() > 0) Arch = parseArch(Components[0]); VendorType Vendor = UnknownVendor; if (Components.size() > 1) Vendor = parseVendor(Components[1]); OSType OS = UnknownOS; if (Components.size() > 2) { OS = parseOS(Components[2]); IsCygwin = Components[2].starts_with("cygwin"); IsMinGW32 = Components[2].starts_with("mingw"); } EnvironmentType Environment = UnknownEnvironment; if (Components.size() > 3) Environment = parseEnvironment(Components[3]); ObjectFormatType ObjectFormat = UnknownObjectFormat; if (Components.size() > 4) ObjectFormat = parseFormat(Components[4]); // Note which components are already in their final position. These will not // be moved. bool Found[4]; Found[0] = Arch != UnknownArch; Found[1] = Vendor != UnknownVendor; Found[2] = OS != UnknownOS; Found[3] = Environment != UnknownEnvironment; // If they are not there already, permute the components into their canonical // positions by seeing if they parse as a valid architecture, and if so moving // the component to the architecture position etc. for (unsigned Pos = 0; Pos != std::size(Found); ++Pos) { if (Found[Pos]) continue; // Already in the canonical position. for (unsigned Idx = 0; Idx != Components.size(); ++Idx) { // Do not reparse any components that already matched. if (Idx < std::size(Found) && Found[Idx]) continue; // Does this component parse as valid for the target position? bool Valid = false; StringRef Comp = Components[Idx]; switch (Pos) { default: llvm_unreachable("unexpected component type!"); case 0: Arch = parseArch(Comp); Valid = Arch != UnknownArch; break; case 1: Vendor = parseVendor(Comp); Valid = Vendor != UnknownVendor; break; case 2: OS = parseOS(Comp); IsCygwin = Comp.starts_with("cygwin"); IsMinGW32 = Comp.starts_with("mingw"); Valid = OS != UnknownOS || IsCygwin || IsMinGW32; break; case 3: Environment = parseEnvironment(Comp); Valid = Environment != UnknownEnvironment; if (!Valid) { ObjectFormat = parseFormat(Comp); Valid = ObjectFormat != UnknownObjectFormat; } break; } if (!Valid) continue; // Nope, try the next component. // Move the component to the target position, pushing any non-fixed // components that are in the way to the right. This tends to give // good results in the common cases of a forgotten vendor component // or a wrongly positioned environment. if (Pos < Idx) { // Insert left, pushing the existing components to the right. For // example, a-b-i386 -> i386-a-b when moving i386 to the front. StringRef CurrentComponent(""); // The empty component. // Replace the component we are moving with an empty component. std::swap(CurrentComponent, Components[Idx]); // Insert the component being moved at Pos, displacing any existing // components to the right. for (unsigned i = Pos; !CurrentComponent.empty(); ++i) { // Skip over any fixed components. while (i < std::size(Found) && Found[i]) ++i; // Place the component at the new position, getting the component // that was at this position - it will be moved right. std::swap(CurrentComponent, Components[i]); } } else if (Pos > Idx) { // Push right by inserting empty components until the component at Idx // reaches the target position Pos. For example, pc-a -> -pc-a when // moving pc to the second position. do { // Insert one empty component at Idx. StringRef CurrentComponent(""); // The empty component. for (unsigned i = Idx; i < Components.size();) { // Place the component at the new position, getting the component // that was at this position - it will be moved right. std::swap(CurrentComponent, Components[i]); // If it was placed on top of an empty component then we are done. if (CurrentComponent.empty()) break; // Advance to the next component, skipping any fixed components. while (++i < std::size(Found) && Found[i]) ; } // The last component was pushed off the end - append it. if (!CurrentComponent.empty()) Components.push_back(CurrentComponent); // Advance Idx to the component's new position. while (++Idx < std::size(Found) && Found[Idx]) ; } while (Idx < Pos); // Add more until the final position is reached. } assert(Pos < Components.size() && Components[Pos] == Comp && "Component moved wrong!"); Found[Pos] = true; break; } } // If "none" is in the middle component in a three-component triple, treat it // as the OS (Components[2]) instead of the vendor (Components[1]). if (Found[0] && !Found[1] && !Found[2] && Found[3] && Components[1] == "none" && Components[2].empty()) std::swap(Components[1], Components[2]); // Replace empty components with "unknown" value. for (StringRef &C : Components) if (C.empty()) C = "unknown"; // Special case logic goes here. At this point Arch, Vendor and OS have the // correct values for the computed components. std::string NormalizedEnvironment; if (Environment == Triple::Android && Components[3].starts_with("androideabi")) { StringRef AndroidVersion = Components[3].drop_front(strlen("androideabi")); if (AndroidVersion.empty()) { Components[3] = "android"; } else { NormalizedEnvironment = Twine("android", AndroidVersion).str(); Components[3] = NormalizedEnvironment; } } // SUSE uses "gnueabi" to mean "gnueabihf" if (Vendor == Triple::SUSE && Environment == llvm::Triple::GNUEABI) Components[3] = "gnueabihf"; if (OS == Triple::Win32) { Components.resize(4); Components[2] = "windows"; if (Environment == UnknownEnvironment) { if (ObjectFormat == UnknownObjectFormat || ObjectFormat == Triple::COFF) Components[3] = "msvc"; else Components[3] = getObjectFormatTypeName(ObjectFormat); } } else if (IsMinGW32) { Components.resize(4); Components[2] = "windows"; Components[3] = "gnu"; } else if (IsCygwin) { Components.resize(4); Components[2] = "windows"; Components[3] = "cygnus"; } if (IsMinGW32 || IsCygwin || (OS == Triple::Win32 && Environment != UnknownEnvironment)) { if (ObjectFormat != UnknownObjectFormat && ObjectFormat != Triple::COFF) { Components.resize(5); Components[4] = getObjectFormatTypeName(ObjectFormat); } } // Normalize DXIL triple if it does not include DXIL version number. // Determine DXIL version number using the minor version number of Shader // Model version specified in target triple, if any. Prior to decoupling DXIL // version numbering from that of Shader Model DXIL version 1.Y corresponds to // SM 6.Y. E.g., dxilv1.Y-unknown-shadermodelX.Y-hull if (Components[0] == "dxil") { if (Components.size() > 4) { Components.resize(4); } // Add DXIL version only if shadermodel is specified in the triple if (OS == Triple::ShaderModel) { Components[0] = getDXILArchNameFromShaderModel(Components[2]); } } // Stick the corrected components back together to form the normalized string. return join(Components, "-"); } StringRef Triple::getArchName() const { return StringRef(Data).split('-').first; // Isolate first component } StringRef Triple::getVendorName() const { StringRef Tmp = StringRef(Data).split('-').second; // Strip first component return Tmp.split('-').first; // Isolate second component } StringRef Triple::getOSName() const { StringRef Tmp = StringRef(Data).split('-').second; // Strip first component Tmp = Tmp.split('-').second; // Strip second component return Tmp.split('-').first; // Isolate third component } StringRef Triple::getEnvironmentName() const { StringRef Tmp = StringRef(Data).split('-').second; // Strip first component Tmp = Tmp.split('-').second; // Strip second component return Tmp.split('-').second; // Strip third component } StringRef Triple::getOSAndEnvironmentName() const { StringRef Tmp = StringRef(Data).split('-').second; // Strip first component return Tmp.split('-').second; // Strip second component } static VersionTuple parseVersionFromName(StringRef Name) { VersionTuple Version; Version.tryParse(Name); return Version.withoutBuild(); } VersionTuple Triple::getEnvironmentVersion() const { return parseVersionFromName(getEnvironmentVersionString()); } StringRef Triple::getEnvironmentVersionString() const { StringRef EnvironmentName = getEnvironmentName(); // none is a valid environment type - it basically amounts to a freestanding // environment. if (EnvironmentName == "none") return ""; StringRef EnvironmentTypeName = getEnvironmentTypeName(getEnvironment()); EnvironmentName.consume_front(EnvironmentTypeName); if (EnvironmentName.contains("-")) { // -obj is the suffix if (getObjectFormat() != Triple::UnknownObjectFormat) { StringRef ObjectFormatTypeName = getObjectFormatTypeName(getObjectFormat()); const std::string tmp = (Twine("-") + ObjectFormatTypeName).str(); EnvironmentName.consume_back(tmp); } } return EnvironmentName; } VersionTuple Triple::getOSVersion() const { StringRef OSName = getOSName(); // Assume that the OS portion of the triple starts with the canonical name. StringRef OSTypeName = getOSTypeName(getOS()); if (OSName.starts_with(OSTypeName)) OSName = OSName.substr(OSTypeName.size()); else if (getOS() == MacOSX) OSName.consume_front("macos"); else if (OSName.starts_with("visionos")) OSName.consume_front("visionos"); return parseVersionFromName(OSName); } bool Triple::getMacOSXVersion(VersionTuple &Version) const { Version = getOSVersion(); switch (getOS()) { default: llvm_unreachable("unexpected OS for Darwin triple"); case Darwin: // Default to darwin8, i.e., MacOSX 10.4. if (Version.getMajor() == 0) Version = VersionTuple(8); // Darwin version numbers are skewed from OS X versions. if (Version.getMajor() < 4) { return false; } if (Version.getMajor() <= 19) { Version = VersionTuple(10, Version.getMajor() - 4); } else { // darwin20+ corresponds to macOS 11+. Version = VersionTuple(11 + Version.getMajor() - 20); } break; case MacOSX: // Default to 10.4. if (Version.getMajor() == 0) { Version = VersionTuple(10, 4); } else if (Version.getMajor() < 10) { return false; } break; case IOS: case TvOS: case WatchOS: // Ignore the version from the triple. This is only handled because the // the clang driver combines OS X and IOS support into a common Darwin // toolchain that wants to know the OS X version number even when targeting // IOS. Version = VersionTuple(10, 4); break; case XROS: llvm_unreachable("OSX version isn't relevant for xrOS"); case DriverKit: llvm_unreachable("OSX version isn't relevant for DriverKit"); } return true; } VersionTuple Triple::getiOSVersion() const { switch (getOS()) { default: llvm_unreachable("unexpected OS for Darwin triple"); case Darwin: case MacOSX: // Ignore the version from the triple. This is only handled because the // the clang driver combines OS X and IOS support into a common Darwin // toolchain that wants to know the iOS version number even when targeting // OS X. return VersionTuple(5); case IOS: case TvOS: { VersionTuple Version = getOSVersion(); // Default to 5.0 (or 7.0 for arm64). if (Version.getMajor() == 0) return (getArch() == aarch64) ? VersionTuple(7) : VersionTuple(5); return Version; } case XROS: { // xrOS 1 is aligned with iOS 17. VersionTuple Version = getOSVersion(); return Version.withMajorReplaced(Version.getMajor() + 16); } case WatchOS: llvm_unreachable("conflicting triple info"); case DriverKit: llvm_unreachable("DriverKit doesn't have an iOS version"); } } VersionTuple Triple::getWatchOSVersion() const { switch (getOS()) { default: llvm_unreachable("unexpected OS for Darwin triple"); case Darwin: case MacOSX: // Ignore the version from the triple. This is only handled because the // the clang driver combines OS X and IOS support into a common Darwin // toolchain that wants to know the iOS version number even when targeting // OS X. return VersionTuple(2); case WatchOS: { VersionTuple Version = getOSVersion(); if (Version.getMajor() == 0) return VersionTuple(2); return Version; } case IOS: llvm_unreachable("conflicting triple info"); case XROS: llvm_unreachable("watchOS version isn't relevant for xrOS"); case DriverKit: llvm_unreachable("DriverKit doesn't have a WatchOS version"); } } VersionTuple Triple::getDriverKitVersion() const { switch (getOS()) { default: llvm_unreachable("unexpected OS for Darwin triple"); case DriverKit: VersionTuple Version = getOSVersion(); if (Version.getMajor() == 0) return Version.withMajorReplaced(19); return Version; } } VersionTuple Triple::getVulkanVersion() const { if (getArch() != spirv || getOS() != Vulkan) llvm_unreachable("invalid Vulkan SPIR-V triple"); VersionTuple VulkanVersion = getOSVersion(); SubArchType SpirvVersion = getSubArch(); llvm::DenseMap ValidVersionMap = { // Vulkan 1.2 -> SPIR-V 1.5. {VersionTuple(1, 2), SPIRVSubArch_v15}, // Vulkan 1.3 -> SPIR-V 1.6. {VersionTuple(1, 3), SPIRVSubArch_v16}}; // If Vulkan version is unset, default to 1.2. if (VulkanVersion == VersionTuple(0)) VulkanVersion = VersionTuple(1, 2); if (ValidVersionMap.contains(VulkanVersion) && (ValidVersionMap.lookup(VulkanVersion) == SpirvVersion || SpirvVersion == NoSubArch)) return VulkanVersion; return VersionTuple(0); } VersionTuple Triple::getDXILVersion() const { if (getArch() != dxil || getOS() != ShaderModel) llvm_unreachable("invalid DXIL triple"); StringRef Arch = getArchName(); if (getSubArch() == NoSubArch) Arch = getDXILArchNameFromShaderModel(getOSName()); Arch.consume_front("dxilv"); VersionTuple DXILVersion = parseVersionFromName(Arch); // FIXME: validate DXIL version against Shader Model version. // Tracked by https://github.com/llvm/llvm-project/issues/91388 return DXILVersion; } void Triple::setTriple(const Twine &Str) { *this = Triple(Str); } void Triple::setArch(ArchType Kind, SubArchType SubArch) { setArchName(getArchName(Kind, SubArch)); } void Triple::setVendor(VendorType Kind) { setVendorName(getVendorTypeName(Kind)); } void Triple::setOS(OSType Kind) { setOSName(getOSTypeName(Kind)); } void Triple::setEnvironment(EnvironmentType Kind) { if (ObjectFormat == getDefaultFormat(*this)) return setEnvironmentName(getEnvironmentTypeName(Kind)); setEnvironmentName((getEnvironmentTypeName(Kind) + Twine("-") + getObjectFormatTypeName(ObjectFormat)).str()); } void Triple::setObjectFormat(ObjectFormatType Kind) { if (Environment == UnknownEnvironment) return setEnvironmentName(getObjectFormatTypeName(Kind)); setEnvironmentName((getEnvironmentTypeName(Environment) + Twine("-") + getObjectFormatTypeName(Kind)).str()); } void Triple::setArchName(StringRef Str) { // Work around a miscompilation bug for Twines in gcc 4.0.3. SmallString<64> Triple; Triple += Str; Triple += "-"; Triple += getVendorName(); Triple += "-"; Triple += getOSAndEnvironmentName(); setTriple(Triple); } void Triple::setVendorName(StringRef Str) { setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName()); } void Triple::setOSName(StringRef Str) { if (hasEnvironment()) setTriple(getArchName() + "-" + getVendorName() + "-" + Str + "-" + getEnvironmentName()); else setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } void Triple::setEnvironmentName(StringRef Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() + "-" + Str); } void Triple::setOSAndEnvironmentName(StringRef Str) { setTriple(getArchName() + "-" + getVendorName() + "-" + Str); } unsigned Triple::getArchPointerBitWidth(llvm::Triple::ArchType Arch) { switch (Arch) { case llvm::Triple::UnknownArch: return 0; case llvm::Triple::avr: case llvm::Triple::msp430: return 16; case llvm::Triple::aarch64_32: case llvm::Triple::amdil: case llvm::Triple::arc: case llvm::Triple::arm: case llvm::Triple::armeb: case llvm::Triple::csky: case llvm::Triple::dxil: case llvm::Triple::hexagon: case llvm::Triple::hsail: case llvm::Triple::kalimba: case llvm::Triple::lanai: case llvm::Triple::le32: case llvm::Triple::loongarch32: case llvm::Triple::m68k: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::nvptx: case llvm::Triple::ppc: case llvm::Triple::ppcle: case llvm::Triple::r600: case llvm::Triple::renderscript32: case llvm::Triple::riscv32: case llvm::Triple::shave: case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::spir: case llvm::Triple::spirv32: case llvm::Triple::tce: case llvm::Triple::tcele: case llvm::Triple::thumb: case llvm::Triple::thumbeb: case llvm::Triple::wasm32: case llvm::Triple::x86: case llvm::Triple::xcore: case llvm::Triple::xtensa: return 32; case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: case llvm::Triple::amdgcn: case llvm::Triple::amdil64: case llvm::Triple::bpfeb: case llvm::Triple::bpfel: case llvm::Triple::hsail64: case llvm::Triple::le64: case llvm::Triple::loongarch64: case llvm::Triple::mips64: case llvm::Triple::mips64el: case llvm::Triple::nvptx64: case llvm::Triple::ppc64: case llvm::Triple::ppc64le: case llvm::Triple::renderscript64: case llvm::Triple::riscv64: case llvm::Triple::sparcv9: case llvm::Triple::spirv: case llvm::Triple::spir64: case llvm::Triple::spirv64: case llvm::Triple::systemz: case llvm::Triple::ve: case llvm::Triple::wasm64: case llvm::Triple::x86_64: return 64; } llvm_unreachable("Invalid architecture value"); } bool Triple::isArch64Bit() const { return getArchPointerBitWidth(getArch()) == 64; } bool Triple::isArch32Bit() const { return getArchPointerBitWidth(getArch()) == 32; } bool Triple::isArch16Bit() const { return getArchPointerBitWidth(getArch()) == 16; } Triple Triple::get32BitArchVariant() const { Triple T(*this); switch (getArch()) { case Triple::UnknownArch: case Triple::amdgcn: case Triple::avr: case Triple::bpfeb: case Triple::bpfel: case Triple::msp430: case Triple::systemz: case Triple::ve: T.setArch(UnknownArch); break; case Triple::aarch64_32: case Triple::amdil: case Triple::arc: case Triple::arm: case Triple::armeb: case Triple::csky: case Triple::dxil: case Triple::hexagon: case Triple::hsail: case Triple::kalimba: case Triple::lanai: case Triple::le32: case Triple::loongarch32: case Triple::m68k: case Triple::mips: case Triple::mipsel: case Triple::nvptx: case Triple::ppc: case Triple::ppcle: case Triple::r600: case Triple::renderscript32: case Triple::riscv32: case Triple::shave: case Triple::sparc: case Triple::sparcel: case Triple::spir: case Triple::spirv32: case Triple::tce: case Triple::tcele: case Triple::thumb: case Triple::thumbeb: case Triple::wasm32: case Triple::x86: case Triple::xcore: case Triple::xtensa: // Already 32-bit. break; case Triple::aarch64: T.setArch(Triple::arm); break; case Triple::aarch64_be: T.setArch(Triple::armeb); break; case Triple::amdil64: T.setArch(Triple::amdil); break; case Triple::hsail64: T.setArch(Triple::hsail); break; case Triple::le64: T.setArch(Triple::le32); break; case Triple::loongarch64: T.setArch(Triple::loongarch32); break; case Triple::mips64: T.setArch(Triple::mips, getSubArch()); break; case Triple::mips64el: T.setArch(Triple::mipsel, getSubArch()); break; case Triple::nvptx64: T.setArch(Triple::nvptx); break; case Triple::ppc64: T.setArch(Triple::ppc); break; case Triple::ppc64le: T.setArch(Triple::ppcle); break; case Triple::renderscript64: T.setArch(Triple::renderscript32); break; case Triple::riscv64: T.setArch(Triple::riscv32); break; case Triple::sparcv9: T.setArch(Triple::sparc); break; case Triple::spir64: T.setArch(Triple::spir); break; case Triple::spirv: case Triple::spirv64: T.setArch(Triple::spirv32, getSubArch()); break; case Triple::wasm64: T.setArch(Triple::wasm32); break; case Triple::x86_64: T.setArch(Triple::x86); break; } return T; } Triple Triple::get64BitArchVariant() const { Triple T(*this); switch (getArch()) { case Triple::UnknownArch: case Triple::arc: case Triple::avr: case Triple::csky: case Triple::dxil: case Triple::hexagon: case Triple::kalimba: case Triple::lanai: case Triple::m68k: case Triple::msp430: case Triple::r600: case Triple::shave: case Triple::sparcel: case Triple::tce: case Triple::tcele: case Triple::xcore: case Triple::xtensa: T.setArch(UnknownArch); break; case Triple::aarch64: case Triple::aarch64_be: case Triple::amdgcn: case Triple::amdil64: case Triple::bpfeb: case Triple::bpfel: case Triple::hsail64: case Triple::le64: case Triple::loongarch64: case Triple::mips64: case Triple::mips64el: case Triple::nvptx64: case Triple::ppc64: case Triple::ppc64le: case Triple::renderscript64: case Triple::riscv64: case Triple::sparcv9: case Triple::spir64: case Triple::spirv64: case Triple::systemz: case Triple::ve: case Triple::wasm64: case Triple::x86_64: // Already 64-bit. break; case Triple::aarch64_32: T.setArch(Triple::aarch64); break; case Triple::amdil: T.setArch(Triple::amdil64); break; case Triple::arm: T.setArch(Triple::aarch64); break; case Triple::armeb: T.setArch(Triple::aarch64_be); break; case Triple::hsail: T.setArch(Triple::hsail64); break; case Triple::le32: T.setArch(Triple::le64); break; case Triple::loongarch32: T.setArch(Triple::loongarch64); break; case Triple::mips: T.setArch(Triple::mips64, getSubArch()); break; case Triple::mipsel: T.setArch(Triple::mips64el, getSubArch()); break; case Triple::nvptx: T.setArch(Triple::nvptx64); break; case Triple::ppc: T.setArch(Triple::ppc64); break; case Triple::ppcle: T.setArch(Triple::ppc64le); break; case Triple::renderscript32: T.setArch(Triple::renderscript64); break; case Triple::riscv32: T.setArch(Triple::riscv64); break; case Triple::sparc: T.setArch(Triple::sparcv9); break; case Triple::spir: T.setArch(Triple::spir64); break; case Triple::spirv: case Triple::spirv32: T.setArch(Triple::spirv64, getSubArch()); break; case Triple::thumb: T.setArch(Triple::aarch64); break; case Triple::thumbeb: T.setArch(Triple::aarch64_be); break; case Triple::wasm32: T.setArch(Triple::wasm64); break; case Triple::x86: T.setArch(Triple::x86_64); break; } return T; } Triple Triple::getBigEndianArchVariant() const { Triple T(*this); // Already big endian. if (!isLittleEndian()) return T; switch (getArch()) { case Triple::UnknownArch: case Triple::amdgcn: case Triple::amdil64: case Triple::amdil: case Triple::avr: case Triple::dxil: case Triple::hexagon: case Triple::hsail64: case Triple::hsail: case Triple::kalimba: case Triple::le32: case Triple::le64: case Triple::loongarch32: case Triple::loongarch64: case Triple::msp430: case Triple::nvptx64: case Triple::nvptx: case Triple::r600: case Triple::renderscript32: case Triple::renderscript64: case Triple::riscv32: case Triple::riscv64: case Triple::shave: case Triple::spir64: case Triple::spir: case Triple::spirv: case Triple::spirv32: case Triple::spirv64: case Triple::wasm32: case Triple::wasm64: case Triple::x86: case Triple::x86_64: case Triple::xcore: case Triple::ve: case Triple::csky: case Triple::xtensa: // ARM is intentionally unsupported here, changing the architecture would // drop any arch suffixes. case Triple::arm: case Triple::thumb: T.setArch(UnknownArch); break; case Triple::aarch64: T.setArch(Triple::aarch64_be); break; case Triple::bpfel: T.setArch(Triple::bpfeb); break; case Triple::mips64el: T.setArch(Triple::mips64, getSubArch()); break; case Triple::mipsel: T.setArch(Triple::mips, getSubArch()); break; case Triple::ppcle: T.setArch(Triple::ppc); break; case Triple::ppc64le: T.setArch(Triple::ppc64); break; case Triple::sparcel: T.setArch(Triple::sparc); break; case Triple::tcele: T.setArch(Triple::tce); break; default: llvm_unreachable("getBigEndianArchVariant: unknown triple."); } return T; } Triple Triple::getLittleEndianArchVariant() const { Triple T(*this); if (isLittleEndian()) return T; switch (getArch()) { case Triple::UnknownArch: case Triple::lanai: case Triple::sparcv9: case Triple::systemz: case Triple::m68k: // ARM is intentionally unsupported here, changing the architecture would // drop any arch suffixes. case Triple::armeb: case Triple::thumbeb: T.setArch(UnknownArch); break; case Triple::aarch64_be: T.setArch(Triple::aarch64); break; case Triple::bpfeb: T.setArch(Triple::bpfel); break; case Triple::mips64: T.setArch(Triple::mips64el, getSubArch()); break; case Triple::mips: T.setArch(Triple::mipsel, getSubArch()); break; case Triple::ppc: T.setArch(Triple::ppcle); break; case Triple::ppc64: T.setArch(Triple::ppc64le); break; case Triple::sparc: T.setArch(Triple::sparcel); break; case Triple::tce: T.setArch(Triple::tcele); break; default: llvm_unreachable("getLittleEndianArchVariant: unknown triple."); } return T; } bool Triple::isLittleEndian() const { switch (getArch()) { case Triple::aarch64: case Triple::aarch64_32: case Triple::amdgcn: case Triple::amdil64: case Triple::amdil: case Triple::arm: case Triple::avr: case Triple::bpfel: case Triple::csky: case Triple::dxil: case Triple::hexagon: case Triple::hsail64: case Triple::hsail: case Triple::kalimba: case Triple::le32: case Triple::le64: case Triple::loongarch32: case Triple::loongarch64: case Triple::mips64el: case Triple::mipsel: case Triple::msp430: case Triple::nvptx64: case Triple::nvptx: case Triple::ppcle: case Triple::ppc64le: case Triple::r600: case Triple::renderscript32: case Triple::renderscript64: case Triple::riscv32: case Triple::riscv64: case Triple::shave: case Triple::sparcel: case Triple::spir64: case Triple::spir: case Triple::spirv: case Triple::spirv32: case Triple::spirv64: case Triple::tcele: case Triple::thumb: case Triple::ve: case Triple::wasm32: case Triple::wasm64: case Triple::x86: case Triple::x86_64: case Triple::xcore: case Triple::xtensa: return true; default: return false; } } bool Triple::isCompatibleWith(const Triple &Other) const { // ARM and Thumb triples are compatible, if subarch, vendor and OS match. if ((getArch() == Triple::thumb && Other.getArch() == Triple::arm) || (getArch() == Triple::arm && Other.getArch() == Triple::thumb) || (getArch() == Triple::thumbeb && Other.getArch() == Triple::armeb) || (getArch() == Triple::armeb && Other.getArch() == Triple::thumbeb)) { if (getVendor() == Triple::Apple) return getSubArch() == Other.getSubArch() && getVendor() == Other.getVendor() && getOS() == Other.getOS(); else return getSubArch() == Other.getSubArch() && getVendor() == Other.getVendor() && getOS() == Other.getOS() && getEnvironment() == Other.getEnvironment() && getObjectFormat() == Other.getObjectFormat(); } // If vendor is apple, ignore the version number. if (getVendor() == Triple::Apple) return getArch() == Other.getArch() && getSubArch() == Other.getSubArch() && getVendor() == Other.getVendor() && getOS() == Other.getOS(); return *this == Other; } std::string Triple::merge(const Triple &Other) const { // If vendor is apple, pick the triple with the larger version number. if (getVendor() == Triple::Apple) if (Other.isOSVersionLT(*this)) return str(); return Other.str(); } bool Triple::isMacOSXVersionLT(unsigned Major, unsigned Minor, unsigned Micro) const { assert(isMacOSX() && "Not an OS X triple!"); // If this is OS X, expect a sane version number. if (getOS() == Triple::MacOSX) return isOSVersionLT(Major, Minor, Micro); // Otherwise, compare to the "Darwin" number. if (Major == 10) { return isOSVersionLT(Minor + 4, Micro, 0); } else { assert(Major >= 11 && "Unexpected major version"); return isOSVersionLT(Major - 11 + 20, Minor, Micro); } } VersionTuple Triple::getMinimumSupportedOSVersion() const { if (getVendor() != Triple::Apple || getArch() != Triple::aarch64) return VersionTuple(); switch (getOS()) { case Triple::MacOSX: // ARM64 slice is supported starting from macOS 11.0+. return VersionTuple(11, 0, 0); case Triple::IOS: // ARM64 slice is supported starting from Mac Catalyst 14 (macOS 11). // ARM64 simulators are supported for iOS 14+. if (isMacCatalystEnvironment() || isSimulatorEnvironment()) return VersionTuple(14, 0, 0); // ARM64e slice is supported starting from iOS 14. if (isArm64e()) return VersionTuple(14, 0, 0); break; case Triple::TvOS: // ARM64 simulators are supported for tvOS 14+. if (isSimulatorEnvironment()) return VersionTuple(14, 0, 0); break; case Triple::WatchOS: // ARM64 simulators are supported for watchOS 7+. if (isSimulatorEnvironment()) return VersionTuple(7, 0, 0); break; case Triple::DriverKit: return VersionTuple(20, 0, 0); default: break; } return VersionTuple(); } VersionTuple Triple::getCanonicalVersionForOS(OSType OSKind, const VersionTuple &Version) { switch (OSKind) { case MacOSX: // macOS 10.16 is canonicalized to macOS 11. if (Version == VersionTuple(10, 16)) return VersionTuple(11, 0); [[fallthrough]]; default: return Version; } } // HLSL triple environment orders are relied on in the front end static_assert(Triple::Vertex - Triple::Pixel == 1, "incorrect HLSL stage order"); static_assert(Triple::Geometry - Triple::Pixel == 2, "incorrect HLSL stage order"); static_assert(Triple::Hull - Triple::Pixel == 3, "incorrect HLSL stage order"); static_assert(Triple::Domain - Triple::Pixel == 4, "incorrect HLSL stage order"); static_assert(Triple::Compute - Triple::Pixel == 5, "incorrect HLSL stage order"); static_assert(Triple::Library - Triple::Pixel == 6, "incorrect HLSL stage order"); static_assert(Triple::RayGeneration - Triple::Pixel == 7, "incorrect HLSL stage order"); static_assert(Triple::Intersection - Triple::Pixel == 8, "incorrect HLSL stage order"); static_assert(Triple::AnyHit - Triple::Pixel == 9, "incorrect HLSL stage order"); static_assert(Triple::ClosestHit - Triple::Pixel == 10, "incorrect HLSL stage order"); static_assert(Triple::Miss - Triple::Pixel == 11, "incorrect HLSL stage order"); static_assert(Triple::Callable - Triple::Pixel == 12, "incorrect HLSL stage order"); static_assert(Triple::Mesh - Triple::Pixel == 13, "incorrect HLSL stage order"); static_assert(Triple::Amplification - Triple::Pixel == 14, "incorrect HLSL stage order"); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp index 68696789530f..fda1c22cc1fb 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1,3074 +1,3074 @@ //===- InlineFunction.cpp - Code to perform function inlining -------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements inlining of a function into a call site, resolving // parameters and the return value as appropriate. // //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryProfileInfo.h" #include "llvm/Analysis/ObjCARCAnalysisUtils.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" #include "llvm/IR/AttributeMask.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include #include #include #include #include #include #include #include #define DEBUG_TYPE "inline-function" using namespace llvm; using namespace llvm::memprof; using ProfileCount = Function::ProfileCount; static cl::opt EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); static cl::opt UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, cl::init(true), cl::desc("Use the llvm.experimental.noalias.scope.decl " "intrinsic during inlining.")); // Disabled by default, because the added alignment assumptions may increase // compile-time and block optimizations. This option is not suitable for use // with frontends that emit comprehensive parameter alignment annotations. static cl::opt PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::init(false), cl::Hidden, cl::desc("Convert align attributes to assumptions during inlining.")); static cl::opt InlinerAttributeWindow( "max-inst-checked-for-throw-during-inlining", cl::Hidden, cl::desc("the maximum number of instructions analyzed for may throw during " "attribute inference in inlined body"), cl::init(4)); namespace { /// A class for recording information about inlining a landing pad. class LandingPadInliningInfo { /// Destination of the invoke's unwind. BasicBlock *OuterResumeDest; /// Destination for the callee's resume. BasicBlock *InnerResumeDest = nullptr; /// LandingPadInst associated with the invoke. LandingPadInst *CallerLPad = nullptr; /// PHI for EH values from landingpad insts. PHINode *InnerEHValuesPHI = nullptr; SmallVector UnwindDestPHIValues; public: LandingPadInliningInfo(InvokeInst *II) : OuterResumeDest(II->getUnwindDest()) { // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing // the edge from this block. BasicBlock *InvokeBB = II->getParent(); BasicBlock::iterator I = OuterResumeDest->begin(); for (; isa(I); ++I) { // Save the value to use for this edge. PHINode *PHI = cast(I); UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); } CallerLPad = cast(I); } /// The outer unwind destination is the target of /// unwind edges introduced for calls within the inlined function. BasicBlock *getOuterResumeDest() const { return OuterResumeDest; } BasicBlock *getInnerResumeDest(); LandingPadInst *getLandingPadInst() const { return CallerLPad; } /// Forward the 'resume' instruction to the caller's landing pad block. /// When the landing pad block has only one predecessor, this is /// a simple branch. When there is more than one predecessor, we need to /// split the landing pad block after the landingpad instruction and jump /// to there. void forwardResume(ResumeInst *RI, SmallPtrSetImpl &InlinedLPads); /// Add incoming-PHI values to the unwind destination block for the given /// basic block, using the values for the original invoke's source block. void addIncomingPHIValuesFor(BasicBlock *BB) const { addIncomingPHIValuesForInto(BB, OuterResumeDest); } void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const { BasicBlock::iterator I = dest->begin(); for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { PHINode *phi = cast(I); phi->addIncoming(UnwindDestPHIValues[i], src); } } }; } // end anonymous namespace /// Get or create a target for the branch from ResumeInsts. BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; // Split the landing pad. BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator(); InnerResumeDest = OuterResumeDest->splitBasicBlock(SplitPoint, OuterResumeDest->getName() + ".body"); // The number of incoming edges we expect to the inner landing pad. const unsigned PHICapacity = 2; // Create corresponding new PHIs for all the PHIs in the outer landing pad. BasicBlock::iterator InsertPoint = InnerResumeDest->begin(); BasicBlock::iterator I = OuterResumeDest->begin(); for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { PHINode *OuterPHI = cast(I); PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, OuterPHI->getName() + ".lpad-body"); InnerPHI->insertBefore(InsertPoint); OuterPHI->replaceAllUsesWith(InnerPHI); InnerPHI->addIncoming(OuterPHI, OuterResumeDest); } // Create a PHI for the exception values. InnerEHValuesPHI = PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body"); InnerEHValuesPHI->insertBefore(InsertPoint); CallerLPad->replaceAllUsesWith(InnerEHValuesPHI); InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest); // All done. return InnerResumeDest; } /// Forward the 'resume' instruction to the caller's landing pad block. /// When the landing pad block has only one predecessor, this is a simple /// branch. When there is more than one predecessor, we need to split the /// landing pad block after the landingpad instruction and jump to there. void LandingPadInliningInfo::forwardResume( ResumeInst *RI, SmallPtrSetImpl &InlinedLPads) { BasicBlock *Dest = getInnerResumeDest(); BasicBlock *Src = RI->getParent(); BranchInst::Create(Dest, Src); // Update the PHIs in the destination. They were inserted in an order which // makes this work. addIncomingPHIValuesForInto(Src, Dest); InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src); RI->eraseFromParent(); } /// Helper for getUnwindDestToken/getUnwindDestTokenHelper. static Value *getParentPad(Value *EHPad) { if (auto *FPI = dyn_cast(EHPad)) return FPI->getParentPad(); return cast(EHPad)->getParentPad(); } using UnwindDestMemoTy = DenseMap; /// Helper for getUnwindDestToken that does the descendant-ward part of /// the search. static Value *getUnwindDestTokenHelper(Instruction *EHPad, UnwindDestMemoTy &MemoMap) { SmallVector Worklist(1, EHPad); while (!Worklist.empty()) { Instruction *CurrentPad = Worklist.pop_back_val(); // We only put pads on the worklist that aren't in the MemoMap. When // we find an unwind dest for a pad we may update its ancestors, but // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, // so they should never get updated while queued on the worklist. assert(!MemoMap.count(CurrentPad)); Value *UnwindDestToken = nullptr; if (auto *CatchSwitch = dyn_cast(CurrentPad)) { if (CatchSwitch->hasUnwindDest()) { UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); } else { // Catchswitch doesn't have a 'nounwind' variant, and one might be // annotated as "unwinds to caller" when really it's nounwind (see // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the // parent's unwind dest from this. We can check its catchpads' // descendants, since they might include a cleanuppad with an // "unwinds to caller" cleanupret, which can be trusted. for (auto HI = CatchSwitch->handler_begin(), HE = CatchSwitch->handler_end(); HI != HE && !UnwindDestToken; ++HI) { BasicBlock *HandlerBlock = *HI; auto *CatchPad = cast(HandlerBlock->getFirstNonPHI()); for (User *Child : CatchPad->users()) { // Intentionally ignore invokes here -- since the catchswitch is // marked "unwind to caller", it would be a verifier error if it // contained an invoke which unwinds out of it, so any invoke we'd // encounter must unwind to some child of the catch. if (!isa(Child) && !isa(Child)) continue; Instruction *ChildPad = cast(Child); auto Memo = MemoMap.find(ChildPad); if (Memo == MemoMap.end()) { // Haven't figured out this child pad yet; queue it. Worklist.push_back(ChildPad); continue; } // We've already checked this child, but might have found that // it offers no proof either way. Value *ChildUnwindDestToken = Memo->second; if (!ChildUnwindDestToken) continue; // We already know the child's unwind dest, which can either // be ConstantTokenNone to indicate unwind to caller, or can // be another child of the catchpad. Only the former indicates // the unwind dest of the catchswitch. if (isa(ChildUnwindDestToken)) { UnwindDestToken = ChildUnwindDestToken; break; } assert(getParentPad(ChildUnwindDestToken) == CatchPad); } } } } else { auto *CleanupPad = cast(CurrentPad); for (User *U : CleanupPad->users()) { if (auto *CleanupRet = dyn_cast(U)) { if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) UnwindDestToken = RetUnwindDest->getFirstNonPHI(); else UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); break; } Value *ChildUnwindDestToken; if (auto *Invoke = dyn_cast(U)) { ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); } else if (isa(U) || isa(U)) { Instruction *ChildPad = cast(U); auto Memo = MemoMap.find(ChildPad); if (Memo == MemoMap.end()) { // Haven't resolved this child yet; queue it and keep searching. Worklist.push_back(ChildPad); continue; } // We've checked this child, but still need to ignore it if it // had no proof either way. ChildUnwindDestToken = Memo->second; if (!ChildUnwindDestToken) continue; } else { // Not a relevant user of the cleanuppad continue; } // In a well-formed program, the child/invoke must either unwind to // an(other) child of the cleanup, or exit the cleanup. In the // first case, continue searching. if (isa(ChildUnwindDestToken) && getParentPad(ChildUnwindDestToken) == CleanupPad) continue; UnwindDestToken = ChildUnwindDestToken; break; } } // If we haven't found an unwind dest for CurrentPad, we may have queued its // children, so move on to the next in the worklist. if (!UnwindDestToken) continue; // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits // any ancestors of CurrentPad up to but not including UnwindDestToken's // parent pad. Record this in the memo map, and check to see if the // original EHPad being queried is one of the ones exited. Value *UnwindParent; if (auto *UnwindPad = dyn_cast(UnwindDestToken)) UnwindParent = getParentPad(UnwindPad); else UnwindParent = nullptr; bool ExitedOriginalPad = false; for (Instruction *ExitedPad = CurrentPad; ExitedPad && ExitedPad != UnwindParent; ExitedPad = dyn_cast(getParentPad(ExitedPad))) { // Skip over catchpads since they just follow their catchswitches. if (isa(ExitedPad)) continue; MemoMap[ExitedPad] = UnwindDestToken; ExitedOriginalPad |= (ExitedPad == EHPad); } if (ExitedOriginalPad) return UnwindDestToken; // Continue the search. } // No definitive information is contained within this funclet. return nullptr; } /// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, /// return that pad instruction. If it unwinds to caller, return /// ConstantTokenNone. If it does not have a definitive unwind destination, /// return nullptr. /// /// This routine gets invoked for calls in funclets in inlinees when inlining /// an invoke. Since many funclets don't have calls inside them, it's queried /// on-demand rather than building a map of pads to unwind dests up front. /// Determining a funclet's unwind dest may require recursively searching its /// descendants, and also ancestors and cousins if the descendants don't provide /// an answer. Since most funclets will have their unwind dest immediately /// available as the unwind dest of a catchswitch or cleanupret, this routine /// searches top-down from the given pad and then up. To avoid worst-case /// quadratic run-time given that approach, it uses a memo map to avoid /// re-processing funclet trees. The callers that rewrite the IR as they go /// take advantage of this, for correctness, by checking/forcing rewritten /// pads' entries to match the original callee view. static Value *getUnwindDestToken(Instruction *EHPad, UnwindDestMemoTy &MemoMap) { // Catchpads unwind to the same place as their catchswitch; // redirct any queries on catchpads so the code below can // deal with just catchswitches and cleanuppads. if (auto *CPI = dyn_cast(EHPad)) EHPad = CPI->getCatchSwitch(); // Check if we've already determined the unwind dest for this pad. auto Memo = MemoMap.find(EHPad); if (Memo != MemoMap.end()) return Memo->second; // Search EHPad and, if necessary, its descendants. Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); if (UnwindDestToken) return UnwindDestToken; // No information is available for this EHPad from itself or any of its // descendants. An unwind all the way out to a pad in the caller would // need also to agree with the unwind dest of the parent funclet, so // search up the chain to try to find a funclet with information. Put // null entries in the memo map to avoid re-processing as we go up. MemoMap[EHPad] = nullptr; #ifndef NDEBUG SmallPtrSet TempMemos; TempMemos.insert(EHPad); #endif Instruction *LastUselessPad = EHPad; Value *AncestorToken; for (AncestorToken = getParentPad(EHPad); auto *AncestorPad = dyn_cast(AncestorToken); AncestorToken = getParentPad(AncestorToken)) { // Skip over catchpads since they just follow their catchswitches. if (isa(AncestorPad)) continue; // If the MemoMap had an entry mapping AncestorPad to nullptr, since we // haven't yet called getUnwindDestTokenHelper for AncestorPad in this // call to getUnwindDestToken, that would mean that AncestorPad had no // information in itself, its descendants, or its ancestors. If that // were the case, then we should also have recorded the lack of information // for the descendant that we're coming from. So assert that we don't // find a null entry in the MemoMap for AncestorPad. assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); auto AncestorMemo = MemoMap.find(AncestorPad); if (AncestorMemo == MemoMap.end()) { UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); } else { UnwindDestToken = AncestorMemo->second; } if (UnwindDestToken) break; LastUselessPad = AncestorPad; MemoMap[LastUselessPad] = nullptr; #ifndef NDEBUG TempMemos.insert(LastUselessPad); #endif } // We know that getUnwindDestTokenHelper was called on LastUselessPad and // returned nullptr (and likewise for EHPad and any of its ancestors up to // LastUselessPad), so LastUselessPad has no information from below. Since // getUnwindDestTokenHelper must investigate all downward paths through // no-information nodes to prove that a node has no information like this, // and since any time it finds information it records it in the MemoMap for // not just the immediately-containing funclet but also any ancestors also // exited, it must be the case that, walking downward from LastUselessPad, // visiting just those nodes which have not been mapped to an unwind dest // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since // they are just used to keep getUnwindDestTokenHelper from repeating work), // any node visited must have been exhaustively searched with no information // for it found. SmallVector Worklist(1, LastUselessPad); while (!Worklist.empty()) { Instruction *UselessPad = Worklist.pop_back_val(); auto Memo = MemoMap.find(UselessPad); if (Memo != MemoMap.end() && Memo->second) { // Here the name 'UselessPad' is a bit of a misnomer, because we've found // that it is a funclet that does have information about unwinding to // a particular destination; its parent was a useless pad. // Since its parent has no information, the unwind edge must not escape // the parent, and must target a sibling of this pad. This local unwind // gives us no information about EHPad. Leave it and the subtree rooted // at it alone. assert(getParentPad(Memo->second) == getParentPad(UselessPad)); continue; } // We know we don't have information for UselesPad. If it has an entry in // the MemoMap (mapping it to nullptr), it must be one of the TempMemos // added on this invocation of getUnwindDestToken; if a previous invocation // recorded nullptr, it would have had to prove that the ancestors of // UselessPad, which include LastUselessPad, had no information, and that // in turn would have required proving that the descendants of // LastUselesPad, which include EHPad, have no information about // LastUselessPad, which would imply that EHPad was mapped to nullptr in // the MemoMap on that invocation, which isn't the case if we got here. assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad)); // Assert as we enumerate users that 'UselessPad' doesn't have any unwind // information that we'd be contradicting by making a map entry for it // (which is something that getUnwindDestTokenHelper must have proved for // us to get here). Just assert on is direct users here; the checks in // this downward walk at its descendants will verify that they don't have // any unwind edges that exit 'UselessPad' either (i.e. they either have no // unwind edges or unwind to a sibling). MemoMap[UselessPad] = UnwindDestToken; if (auto *CatchSwitch = dyn_cast(UselessPad)) { assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad"); for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) { auto *CatchPad = HandlerBlock->getFirstNonPHI(); for (User *U : CatchPad->users()) { assert( (!isa(U) || (getParentPad( cast(U)->getUnwindDest()->getFirstNonPHI()) == CatchPad)) && "Expected useless pad"); if (isa(U) || isa(U)) Worklist.push_back(cast(U)); } } } else { assert(isa(UselessPad)); for (User *U : UselessPad->users()) { assert(!isa(U) && "Expected useless pad"); assert((!isa(U) || (getParentPad( cast(U)->getUnwindDest()->getFirstNonPHI()) == UselessPad)) && "Expected useless pad"); if (isa(U) || isa(U)) Worklist.push_back(cast(U)); } } } return UnwindDestToken; } /// When we inline a basic block into an invoke, /// we have to turn all of the calls that can throw into invokes. /// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( BasicBlock *BB, BasicBlock *UnwindEdge, UnwindDestMemoTy *FuncletUnwindMap = nullptr) { for (Instruction &I : llvm::make_early_inc_range(*BB)) { // We only need to check for function calls: inlined invoke // instructions require no special handling. CallInst *CI = dyn_cast(&I); if (!CI || CI->doesNotThrow()) continue; // We do not need to (and in fact, cannot) convert possibly throwing calls // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into // invokes. The caller's "segment" of the deoptimization continuation // attached to the newly inlined @llvm.experimental_deoptimize // (resp. @llvm.experimental.guard) call should contain the exception // handling logic, if any. if (auto *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || F->getIntrinsicID() == Intrinsic::experimental_guard) continue; if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { // This call is nested inside a funclet. If that funclet has an unwind // destination within the inlinee, then unwinding out of this call would // be UB. Rewriting this call to an invoke which targets the inlined // invoke's unwind dest would give the call's parent funclet multiple // unwind destinations, which is something that subsequent EH table // generation can't handle and that the veirifer rejects. So when we // see such a call, leave it as a call. auto *FuncletPad = cast(FuncletBundle->Inputs[0]); Value *UnwindDestToken = getUnwindDestToken(FuncletPad, *FuncletUnwindMap); if (UnwindDestToken && !isa(UnwindDestToken)) continue; #ifndef NDEBUG Instruction *MemoKey; if (auto *CatchPad = dyn_cast(FuncletPad)) MemoKey = CatchPad->getCatchSwitch(); else MemoKey = FuncletPad; assert(FuncletUnwindMap->count(MemoKey) && (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && "must get memoized to avoid confusing later searches"); #endif // NDEBUG } changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); return BB; } return nullptr; } /// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); Function *Caller = FirstNewBlock->getParent(); // The inlined code is currently at the end of the function, scan from the // start of the inlined code to its end, checking for stuff we need to // rewrite. LandingPadInliningInfo Invoke(II); // Get all of the inlined landing pad instructions. SmallPtrSet InlinedLPads; for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); I != E; ++I) if (InvokeInst *II = dyn_cast(I->getTerminator())) InlinedLPads.insert(II->getLandingPadInst()); // Append the clauses from the outer landing pad instruction into the inlined // landing pad instructions. LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); for (LandingPadInst *InlinedLPad : InlinedLPads) { unsigned OuterNum = OuterLPad->getNumClauses(); InlinedLPad->reserveClauses(OuterNum); for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); if (OuterLPad->isCleanup()) InlinedLPad->setCleanup(true); } for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) { if (InlinedCodeInfo.ContainsCalls) if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( &*BB, Invoke.getOuterResumeDest())) // Update any PHI nodes in the exceptional block to indicate that there // is now a new entry in them. Invoke.addIncomingPHIValuesFor(NewBB); // Forward any resumes that are remaining here. if (ResumeInst *RI = dyn_cast(BB->getTerminator())) Invoke.forwardResume(RI, InlinedLPads); } // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. InvokeDest->removePredecessor(II->getParent()); } /// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// /// II is the invoke instruction being inlined. FirstNewBlock is the first /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *UnwindDest = II->getUnwindDest(); Function *Caller = FirstNewBlock->getParent(); assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!"); // If there are PHI nodes in the unwind destination block, we need to keep // track of which values came into them from the invoke before removing the // edge from this block. SmallVector UnwindDestPHIValues; BasicBlock *InvokeBB = II->getParent(); for (PHINode &PHI : UnwindDest->phis()) { // Save the value to use for this edge. UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB)); } // Add incoming-PHI values to the unwind destination block for the given basic // block, using the values for the original invoke's source block. auto UpdatePHINodes = [&](BasicBlock *Src) { BasicBlock::iterator I = UnwindDest->begin(); for (Value *V : UnwindDestPHIValues) { PHINode *PHI = cast(I); PHI->addIncoming(V, Src); ++I; } }; // This connects all the instructions which 'unwind to caller' to the invoke // destination. UnwindDestMemoTy FuncletUnwindMap; for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) { if (auto *CRI = dyn_cast(BB->getTerminator())) { if (CRI->unwindsToCaller()) { auto *CleanupPad = CRI->getCleanupPad(); CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI->getIterator()); CRI->eraseFromParent(); UpdatePHINodes(&*BB); // Finding a cleanupret with an unwind destination would confuse // subsequent calls to getUnwindDestToken, so map the cleanuppad // to short-circuit any such calls and recognize this as an "unwind // to caller" cleanup. assert(!FuncletUnwindMap.count(CleanupPad) || isa(FuncletUnwindMap[CleanupPad])); FuncletUnwindMap[CleanupPad] = ConstantTokenNone::get(Caller->getContext()); } } Instruction *I = BB->getFirstNonPHI(); if (!I->isEHPad()) continue; Instruction *Replacement = nullptr; if (auto *CatchSwitch = dyn_cast(I)) { if (CatchSwitch->unwindsToCaller()) { Value *UnwindDestToken; if (auto *ParentPad = dyn_cast(CatchSwitch->getParentPad())) { // This catchswitch is nested inside another funclet. If that // funclet has an unwind destination within the inlinee, then // unwinding out of this catchswitch would be UB. Rewriting this // catchswitch to unwind to the inlined invoke's unwind dest would // give the parent funclet multiple unwind destinations, which is // something that subsequent EH table generation can't handle and // that the veirifer rejects. So when we see such a call, leave it // as "unwind to caller". UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); if (UnwindDestToken && !isa(UnwindDestToken)) continue; } else { // This catchswitch has no parent to inherit constraints from, and // none of its descendants can have an unwind edge that exits it and // targets another funclet in the inlinee. It may or may not have a // descendant that definitively has an unwind to caller. In either // case, we'll have to assume that any unwinds out of it may need to // be routed to the caller, so treat it as though it has a definitive // unwind to caller. UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); } auto *NewCatchSwitch = CatchSwitchInst::Create( CatchSwitch->getParentPad(), UnwindDest, CatchSwitch->getNumHandlers(), CatchSwitch->getName(), CatchSwitch->getIterator()); for (BasicBlock *PadBB : CatchSwitch->handlers()) NewCatchSwitch->addHandler(PadBB); // Propagate info for the old catchswitch over to the new one in // the unwind map. This also serves to short-circuit any subsequent // checks for the unwind dest of this catchswitch, which would get // confused if they found the outer handler in the callee. FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; Replacement = NewCatchSwitch; } } else if (!isa(I)) { llvm_unreachable("unexpected EHPad!"); } if (Replacement) { Replacement->takeName(I); I->replaceAllUsesWith(Replacement); I->eraseFromParent(); UpdatePHINodes(&*BB); } } if (InlinedCodeInfo.ContainsCalls) for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( &*BB, UnwindDest, &FuncletUnwindMap)) // Update any PHI nodes in the exceptional block to indicate that there // is now a new entry in them. UpdatePHINodes(NewBB); // Now that everything is happy, we have one final detail. The PHI nodes in // the exception destination block still have entries due to the original // invoke instruction. Eliminate these entries (which might even delete the // PHI node) now. UnwindDest->removePredecessor(InvokeBB); } static bool haveCommonPrefix(MDNode *MIBStackContext, MDNode *CallsiteStackContext) { assert(MIBStackContext->getNumOperands() > 0 && CallsiteStackContext->getNumOperands() > 0); // Because of the context trimming performed during matching, the callsite // context could have more stack ids than the MIB. We match up to the end of // the shortest stack context. for (auto MIBStackIter = MIBStackContext->op_begin(), CallsiteStackIter = CallsiteStackContext->op_begin(); MIBStackIter != MIBStackContext->op_end() && CallsiteStackIter != CallsiteStackContext->op_end(); MIBStackIter++, CallsiteStackIter++) { auto *Val1 = mdconst::dyn_extract(*MIBStackIter); auto *Val2 = mdconst::dyn_extract(*CallsiteStackIter); assert(Val1 && Val2); if (Val1->getZExtValue() != Val2->getZExtValue()) return false; } return true; } static void removeMemProfMetadata(CallBase *Call) { Call->setMetadata(LLVMContext::MD_memprof, nullptr); } static void removeCallsiteMetadata(CallBase *Call) { Call->setMetadata(LLVMContext::MD_callsite, nullptr); } static void updateMemprofMetadata(CallBase *CI, const std::vector &MIBList) { assert(!MIBList.empty()); // Remove existing memprof, which will either be replaced or may not be needed // if we are able to use a single allocation type function attribute. removeMemProfMetadata(CI); CallStackTrie CallStack; for (Metadata *MIB : MIBList) CallStack.addCallStack(cast(MIB)); bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI); assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof)); if (!MemprofMDAttached) // If we used a function attribute remove the callsite metadata as well. removeCallsiteMetadata(CI); } // Update the metadata on the inlined copy ClonedCall of a call OrigCall in the // inlined callee body, based on the callsite metadata InlinedCallsiteMD from // the call that was inlined. static void propagateMemProfHelper(const CallBase *OrigCall, CallBase *ClonedCall, MDNode *InlinedCallsiteMD) { MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite); MDNode *ClonedCallsiteMD = nullptr; // Check if the call originally had callsite metadata, and update it for the // new call in the inlined body. if (OrigCallsiteMD) { // The cloned call's context is now the concatenation of the original call's // callsite metadata and the callsite metadata on the call where it was // inlined. ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD); ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD); } // Update any memprof metadata on the cloned call. MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof); if (!OrigMemProfMD) return; // We currently expect that allocations with memprof metadata also have // callsite metadata for the allocation's part of the context. assert(OrigCallsiteMD); // New call's MIB list. std::vector NewMIBList; // For each MIB metadata, check if its call stack context starts with the // new clone's callsite metadata. If so, that MIB goes onto the cloned call in // the inlined body. If not, it stays on the out-of-line original call. for (auto &MIBOp : OrigMemProfMD->operands()) { MDNode *MIB = dyn_cast(MIBOp); // Stack is first operand of MIB. MDNode *StackMD = getMIBStackNode(MIB); assert(StackMD); // See if the new cloned callsite context matches this profiled context. if (haveCommonPrefix(StackMD, ClonedCallsiteMD)) // Add it to the cloned call's MIB list. NewMIBList.push_back(MIB); } if (NewMIBList.empty()) { removeMemProfMetadata(ClonedCall); removeCallsiteMetadata(ClonedCall); return; } if (NewMIBList.size() < OrigMemProfMD->getNumOperands()) updateMemprofMetadata(ClonedCall, NewMIBList); } // Update memprof related metadata (!memprof and !callsite) based on the // inlining of Callee into the callsite at CB. The updates include merging the // inlined callee's callsite metadata with that of the inlined call, // and moving the subset of any memprof contexts to the inlined callee // allocations if they match the new inlined call stack. static void propagateMemProfMetadata(Function *Callee, CallBase &CB, bool ContainsMemProfMetadata, const ValueMap &VMap) { MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite); // Only need to update if the inlined callsite had callsite metadata, or if // there was any memprof metadata inlined. if (!CallsiteMD && !ContainsMemProfMetadata) return; // Propagate metadata onto the cloned calls in the inlined callee. for (const auto &Entry : VMap) { // See if this is a call that has been inlined and remapped, and not // simplified away in the process. auto *OrigCall = dyn_cast_or_null(Entry.first); auto *ClonedCall = dyn_cast_or_null(Entry.second); if (!OrigCall || !ClonedCall) continue; // If the inlined callsite did not have any callsite metadata, then it isn't // involved in any profiled call contexts, and we can remove any memprof // metadata on the cloned call. if (!CallsiteMD) { removeMemProfMetadata(ClonedCall); removeCallsiteMetadata(ClonedCall); continue; } propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD); } } /// When inlining a call site that has !llvm.mem.parallel_loop_access, /// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should /// be propagated to all memory-accessing cloned instructions. static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart, Function::iterator FEnd) { MDNode *MemParallelLoopAccess = CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access); MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group); MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope); MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias); if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias) return; for (BasicBlock &BB : make_range(FStart, FEnd)) { for (Instruction &I : BB) { // This metadata is only relevant for instructions that access memory. if (!I.mayReadOrWriteMemory()) continue; if (MemParallelLoopAccess) { // TODO: This probably should not overwrite MemParalleLoopAccess. MemParallelLoopAccess = MDNode::concatenate( I.getMetadata(LLVMContext::MD_mem_parallel_loop_access), MemParallelLoopAccess); I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, MemParallelLoopAccess); } if (AccessGroup) I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups( I.getMetadata(LLVMContext::MD_access_group), AccessGroup)); if (AliasScope) I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate( I.getMetadata(LLVMContext::MD_alias_scope), AliasScope)); if (NoAlias) I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( I.getMetadata(LLVMContext::MD_noalias), NoAlias)); } } } /// Bundle operands of the inlined function must be added to inlined call sites. static void PropagateOperandBundles(Function::iterator InlinedBB, Instruction *CallSiteEHPad) { for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) { CallBase *I = dyn_cast(&II); if (!I) continue; // Skip call sites which already have a "funclet" bundle. if (I->getOperandBundle(LLVMContext::OB_funclet)) continue; // Skip call sites which are nounwind intrinsics (as long as they don't // lower into regular function calls in the course of IR transformations). auto *CalledFn = dyn_cast(I->getCalledOperand()->stripPointerCasts()); if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() && !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID())) continue; SmallVector OpBundles; I->getOperandBundlesAsDefs(OpBundles); OpBundles.emplace_back("funclet", CallSiteEHPad); Instruction *NewInst = CallBase::Create(I, OpBundles, I->getIterator()); NewInst->takeName(I); I->replaceAllUsesWith(NewInst); I->eraseFromParent(); } } namespace { /// Utility for cloning !noalias and !alias.scope metadata. When a code region /// using scoped alias metadata is inlined, the aliasing relationships may not /// hold between the two version. It is necessary to create a deep clone of the /// metadata, putting the two versions in separate scope domains. class ScopedAliasMetadataDeepCloner { using MetadataMap = DenseMap; SetVector MD; MetadataMap MDMap; void addRecursiveMetadataUses(); public: ScopedAliasMetadataDeepCloner(const Function *F); /// Create a new clone of the scoped alias metadata, which will be used by /// subsequent remap() calls. void clone(); /// Remap instructions in the given range from the original to the cloned /// metadata. void remap(Function::iterator FStart, Function::iterator FEnd); }; } // namespace ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner( const Function *F) { for (const BasicBlock &BB : *F) { for (const Instruction &I : BB) { if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) MD.insert(M); if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) MD.insert(M); // We also need to clone the metadata in noalias intrinsics. if (const auto *Decl = dyn_cast(&I)) MD.insert(Decl->getScopeList()); } } addRecursiveMetadataUses(); } void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() { SmallVector Queue(MD.begin(), MD.end()); while (!Queue.empty()) { const MDNode *M = cast(Queue.pop_back_val()); for (const Metadata *Op : M->operands()) if (const MDNode *OpMD = dyn_cast(Op)) if (MD.insert(OpMD)) Queue.push_back(OpMD); } } void ScopedAliasMetadataDeepCloner::clone() { assert(MDMap.empty() && "clone() already called ?"); SmallVector DummyNodes; for (const MDNode *I : MD) { DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), std::nullopt)); MDMap[I].reset(DummyNodes.back().get()); } // Create new metadata nodes to replace the dummy nodes, replacing old // metadata references with either a dummy node or an already-created new // node. SmallVector NewOps; for (const MDNode *I : MD) { for (const Metadata *Op : I->operands()) { if (const MDNode *M = dyn_cast(Op)) NewOps.push_back(MDMap[M]); else NewOps.push_back(const_cast(Op)); } MDNode *NewM = MDNode::get(I->getContext(), NewOps); MDTuple *TempM = cast(MDMap[I]); assert(TempM->isTemporary() && "Expected temporary node"); TempM->replaceAllUsesWith(NewM); NewOps.clear(); } } void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart, Function::iterator FEnd) { if (MDMap.empty()) return; // Nothing to do. for (BasicBlock &BB : make_range(FStart, FEnd)) { for (Instruction &I : BB) { // TODO: The null checks for the MDMap.lookup() results should no longer // be necessary. if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) if (MDNode *MNew = MDMap.lookup(M)) I.setMetadata(LLVMContext::MD_alias_scope, MNew); if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) if (MDNode *MNew = MDMap.lookup(M)) I.setMetadata(LLVMContext::MD_noalias, MNew); if (auto *Decl = dyn_cast(&I)) if (MDNode *MNew = MDMap.lookup(Decl->getScopeList())) Decl->setScopeList(MNew); } } } /// If the inlined function has noalias arguments, /// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, const DataLayout &DL, AAResults *CalleeAAR, ClonedCodeInfo &InlinedFunctionInfo) { if (!EnableNoAliasConversion) return; const Function *CalledFunc = CB.getCalledFunction(); SmallVector NoAliasArgs; for (const Argument &Arg : CalledFunc->args()) if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty()) NoAliasArgs.push_back(&Arg); if (NoAliasArgs.empty()) return; // To do a good job, if a noalias variable is captured, we need to know if // the capture point dominates the particular use we're considering. DominatorTree DT; DT.recalculate(const_cast(*CalledFunc)); // noalias indicates that pointer values based on the argument do not alias // pointer values which are not based on it. So we add a new "scope" for each // noalias function argument. Accesses using pointers based on that argument // become part of that alias scope, accesses using pointers not based on that // argument are tagged as noalias with that scope. DenseMap NewScopes; MDBuilder MDB(CalledFunc->getContext()); // Create a new scope domain for this function. MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) { const Argument *A = NoAliasArgs[i]; std::string Name = std::string(CalledFunc->getName()); if (A->hasName()) { Name += ": %"; Name += A->getName(); } else { Name += ": argument "; Name += utostr(i); } // Note: We always create a new anonymous root here. This is true regardless // of the linkage of the callee because the aliasing "scope" is not just a // property of the callee, but also all control dependencies in the caller. MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); NewScopes.insert(std::make_pair(A, NewScope)); if (UseNoAliasIntrinsic) { // Introduce a llvm.experimental.noalias.scope.decl for the noalias // argument. MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); auto *NoAliasDecl = IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList); // Ignore the result for now. The result will be used when the // llvm.noalias intrinsic is introduced. (void)NoAliasDecl; } } // Iterate over all new instructions in the map; for all memory-access // instructions, add the alias scope metadata. for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); VMI != VMIE; ++VMI) { if (const Instruction *I = dyn_cast(VMI->first)) { if (!VMI->second) continue; Instruction *NI = dyn_cast(VMI->second); if (!NI || InlinedFunctionInfo.isSimplified(I, NI)) continue; bool IsArgMemOnlyCall = false, IsFuncCall = false; SmallVector PtrArgs; if (const LoadInst *LI = dyn_cast(I)) PtrArgs.push_back(LI->getPointerOperand()); else if (const StoreInst *SI = dyn_cast(I)) PtrArgs.push_back(SI->getPointerOperand()); else if (const VAArgInst *VAAI = dyn_cast(I)) PtrArgs.push_back(VAAI->getPointerOperand()); else if (const AtomicCmpXchgInst *CXI = dyn_cast(I)) PtrArgs.push_back(CXI->getPointerOperand()); else if (const AtomicRMWInst *RMWI = dyn_cast(I)) PtrArgs.push_back(RMWI->getPointerOperand()); else if (const auto *Call = dyn_cast(I)) { // If we know that the call does not access memory, then we'll still // know that about the inlined clone of this call site, and we don't // need to add metadata. if (Call->doesNotAccessMemory()) continue; IsFuncCall = true; if (CalleeAAR) { MemoryEffects ME = CalleeAAR->getMemoryEffects(Call); // We'll retain this knowledge without additional metadata. if (ME.onlyAccessesInaccessibleMem()) continue; if (ME.onlyAccessesArgPointees()) IsArgMemOnlyCall = true; } for (Value *Arg : Call->args()) { // Only care about pointer arguments. If a noalias argument is // accessed through a non-pointer argument, it must be captured // first (e.g. via ptrtoint), and we protect against captures below. if (!Arg->getType()->isPointerTy()) continue; PtrArgs.push_back(Arg); } } // If we found no pointers, then this instruction is not suitable for // pairing with an instruction to receive aliasing metadata. // However, if this is a call, this we might just alias with none of the // noalias arguments. if (PtrArgs.empty() && !IsFuncCall) continue; // It is possible that there is only one underlying object, but you // need to go through several PHIs to see it, and thus could be // repeated in the Objects list. SmallPtrSet ObjSet; SmallVector Scopes, NoAliases; for (const Value *V : PtrArgs) { SmallVector Objects; getUnderlyingObjects(V, Objects, /* LI = */ nullptr); for (const Value *O : Objects) ObjSet.insert(O); } // Figure out if we're derived from anything that is not a noalias // argument. bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false, UsesUnknownObject = false; for (const Value *V : ObjSet) { // Is this value a constant that cannot be derived from any pointer // value (we need to exclude constant expressions, for example, that // are formed from arithmetic on global symbols). bool IsNonPtrConst = isa(V) || isa(V) || isa(V) || isa(V) || isa(V); if (IsNonPtrConst) continue; // If this is anything other than a noalias argument, then we cannot // completely describe the aliasing properties using alias.scope // metadata (and, thus, won't add any). if (const Argument *A = dyn_cast(V)) { if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias)) UsesAliasingPtr = true; } else { UsesAliasingPtr = true; } if (isEscapeSource(V)) { // An escape source can only alias with a noalias argument if it has // been captured beforehand. RequiresNoCaptureBefore = true; } else if (!isa(V) && !isIdentifiedObject(V)) { // If this is neither an escape source, nor some identified object // (which cannot directly alias a noalias argument), nor some other // argument (which, by definition, also cannot alias a noalias // argument), conservatively do not make any assumptions. UsesUnknownObject = true; } } // Nothing we can do if the used underlying object cannot be reliably // determined. if (UsesUnknownObject) continue; // A function call can always get captured noalias pointers (via other // parameters, globals, etc.). if (IsFuncCall && !IsArgMemOnlyCall) RequiresNoCaptureBefore = true; // First, we want to figure out all of the sets with which we definitely // don't alias. Iterate over all noalias set, and add those for which: // 1. The noalias argument is not in the set of objects from which we // definitely derive. // 2. The noalias argument has not yet been captured. // An arbitrary function that might load pointers could see captured // noalias arguments via other noalias arguments or globals, and so we // must always check for prior capture. for (const Argument *A : NoAliasArgs) { if (ObjSet.contains(A)) continue; // May be based on a noalias argument. // It might be tempting to skip the PointerMayBeCapturedBefore check if // A->hasNoCaptureAttr() is true, but this is incorrect because // nocapture only guarantees that no copies outlive the function, not // that the value cannot be locally captured. if (!RequiresNoCaptureBefore || !PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false, /* StoreCaptures */ false, I, &DT)) NoAliases.push_back(NewScopes[A]); } if (!NoAliases.empty()) NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate( NI->getMetadata(LLVMContext::MD_noalias), MDNode::get(CalledFunc->getContext(), NoAliases))); // Next, we want to figure out all of the sets to which we might belong. // We might belong to a set if the noalias argument is in the set of // underlying objects. If there is some non-noalias argument in our list // of underlying objects, then we cannot add a scope because the fact // that some access does not alias with any set of our noalias arguments // cannot itself guarantee that it does not alias with this access // (because there is some pointer of unknown origin involved and the // other access might also depend on this pointer). We also cannot add // scopes to arbitrary functions unless we know they don't access any // non-parameter pointer-values. bool CanAddScopes = !UsesAliasingPtr; if (CanAddScopes && IsFuncCall) CanAddScopes = IsArgMemOnlyCall; if (CanAddScopes) for (const Argument *A : NoAliasArgs) { if (ObjSet.count(A)) Scopes.push_back(NewScopes[A]); } if (!Scopes.empty()) NI->setMetadata( LLVMContext::MD_alias_scope, MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope), MDNode::get(CalledFunc->getContext(), Scopes))); } } } static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, ReturnInst *End) { assert(Begin->getParent() == End->getParent() && "Expected to be in same basic block!"); auto BeginIt = Begin->getIterator(); assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator"); return !llvm::isGuaranteedToTransferExecutionToSuccessor( ++BeginIt, End->getIterator(), InlinerAttributeWindow + 1); } // Add attributes from CB params and Fn attributes that can always be propagated // to the corresponding argument / inner callbases. static void AddParamAndFnBasicAttributes(const CallBase &CB, ValueToValueMapTy &VMap, ClonedCodeInfo &InlinedFunctionInfo) { auto *CalledFunction = CB.getCalledFunction(); auto &Context = CalledFunction->getContext(); // Collect valid attributes for all params. SmallVector ValidParamAttrs; bool HasAttrToPropagate = false; for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) { ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()}); // Access attributes can be propagated to any param with the same underlying // object as the argument. if (CB.paramHasAttr(I, Attribute::ReadNone)) ValidParamAttrs.back().addAttribute(Attribute::ReadNone); if (CB.paramHasAttr(I, Attribute::ReadOnly)) ValidParamAttrs.back().addAttribute(Attribute::ReadOnly); HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes(); } // Won't be able to propagate anything. if (!HasAttrToPropagate) return; for (BasicBlock &BB : *CalledFunction) { for (Instruction &Ins : BB) { const auto *InnerCB = dyn_cast(&Ins); if (!InnerCB) continue; auto *NewInnerCB = dyn_cast_or_null(VMap.lookup(InnerCB)); if (!NewInnerCB) continue; // The InnerCB might have be simplified during the inlining // process which can make propagation incorrect. if (InlinedFunctionInfo.isSimplified(InnerCB, NewInnerCB)) continue; AttributeList AL = NewInnerCB->getAttributes(); for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) { // Check if the underlying value for the parameter is an argument. const Value *UnderlyingV = getUnderlyingObject(InnerCB->getArgOperand(I)); const Argument *Arg = dyn_cast(UnderlyingV); if (!Arg) continue; - if (AL.hasParamAttr(I, Attribute::ByVal)) + if (NewInnerCB->paramHasAttr(I, Attribute::ByVal)) // It's unsound to propagate memory attributes to byval arguments. // Even if CalledFunction doesn't e.g. write to the argument, // the call to NewInnerCB may write to its by-value copy. continue; unsigned ArgNo = Arg->getArgNo(); // If so, propagate its access attributes. AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]); // We can have conflicting attributes from the inner callsite and // to-be-inlined callsite. In that case, choose the most // restrictive. // readonly + writeonly means we can never deref so make readnone. if (AL.hasParamAttr(I, Attribute::ReadOnly) && AL.hasParamAttr(I, Attribute::WriteOnly)) AL = AL.addParamAttribute(Context, I, Attribute::ReadNone); // If have readnone, need to clear readonly/writeonly if (AL.hasParamAttr(I, Attribute::ReadNone)) { AL = AL.removeParamAttribute(Context, I, Attribute::ReadOnly); AL = AL.removeParamAttribute(Context, I, Attribute::WriteOnly); } // Writable cannot exist in conjunction w/ readonly/readnone if (AL.hasParamAttr(I, Attribute::ReadOnly) || AL.hasParamAttr(I, Attribute::ReadNone)) AL = AL.removeParamAttribute(Context, I, Attribute::Writable); } NewInnerCB->setAttributes(AL); } } } // Only allow these white listed attributes to be propagated back to the // callee. This is because other attributes may only be valid on the call // itself, i.e. attributes such as signext and zeroext. // Attributes that are always okay to propagate as if they are violated its // immediate UB. static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) { AttrBuilder Valid(CB.getContext()); if (auto DerefBytes = CB.getRetDereferenceableBytes()) Valid.addDereferenceableAttr(DerefBytes); if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes()) Valid.addDereferenceableOrNullAttr(DerefOrNullBytes); if (CB.hasRetAttr(Attribute::NoAlias)) Valid.addAttribute(Attribute::NoAlias); if (CB.hasRetAttr(Attribute::NoUndef)) Valid.addAttribute(Attribute::NoUndef); return Valid; } // Attributes that need additional checks as propagating them may change // behavior or cause new UB. static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { AttrBuilder Valid(CB.getContext()); if (CB.hasRetAttr(Attribute::NonNull)) Valid.addAttribute(Attribute::NonNull); if (CB.hasRetAttr(Attribute::Alignment)) Valid.addAlignmentAttr(CB.getRetAlign()); if (std::optional Range = CB.getRange()) Valid.addRangeAttr(*Range); return Valid; } static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap, ClonedCodeInfo &InlinedFunctionInfo) { AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB); AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB); if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes()) return; auto *CalledFunction = CB.getCalledFunction(); auto &Context = CalledFunction->getContext(); for (auto &BB : *CalledFunction) { auto *RI = dyn_cast(BB.getTerminator()); if (!RI || !isa(RI->getOperand(0))) continue; auto *RetVal = cast(RI->getOperand(0)); // Check that the cloned RetVal exists and is a call, otherwise we cannot // add the attributes on the cloned RetVal. Simplification during inlining // could have transformed the cloned instruction. auto *NewRetVal = dyn_cast_or_null(VMap.lookup(RetVal)); if (!NewRetVal) continue; // The RetVal might have be simplified during the inlining // process which can make propagation incorrect. if (InlinedFunctionInfo.isSimplified(RetVal, NewRetVal)) continue; // Backward propagation of attributes to the returned value may be incorrect // if it is control flow dependent. // Consider: // @callee { // %rv = call @foo() // %rv2 = call @bar() // if (%rv2 != null) // return %rv2 // if (%rv == null) // exit() // return %rv // } // caller() { // %val = call nonnull @callee() // } // Here we cannot add the nonnull attribute on either foo or bar. So, we // limit the check to both RetVal and RI are in the same basic block and // there are no throwing/exiting instructions between these instructions. if (RI->getParent() != RetVal->getParent() || MayContainThrowingOrExitingCallAfterCB(RetVal, RI)) continue; // Add to the existing attributes of NewRetVal, i.e. the cloned call // instruction. // NB! When we have the same attribute already existing on NewRetVal, but // with a differing value, the AttributeList's merge API honours the already // existing attribute value (i.e. attributes such as dereferenceable, // dereferenceable_or_null etc). See AttrBuilder::merge for more details. AttributeList AL = NewRetVal->getAttributes(); if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes()) ValidUB.removeAttribute(Attribute::Dereferenceable); if (ValidUB.getDereferenceableOrNullBytes() < AL.getRetDereferenceableOrNullBytes()) ValidUB.removeAttribute(Attribute::DereferenceableOrNull); AttributeList NewAL = AL.addRetAttributes(Context, ValidUB); // Attributes that may generate poison returns are a bit tricky. If we // propagate them, other uses of the callsite might have their behavior // change or cause UB (if they have noundef) b.c of the new potential // poison. // Take the following three cases: // // 1) // define nonnull ptr @foo() { // %p = call ptr @bar() // call void @use(ptr %p) willreturn nounwind // ret ptr %p // } // // 2) // define noundef nonnull ptr @foo() { // %p = call ptr @bar() // call void @use(ptr %p) willreturn nounwind // ret ptr %p // } // // 3) // define nonnull ptr @foo() { // %p = call noundef ptr @bar() // ret ptr %p // } // // In case 1, we can't propagate nonnull because poison value in @use may // change behavior or trigger UB. // In case 2, we don't need to be concerned about propagating nonnull, as // any new poison at @use will trigger UB anyways. // In case 3, we can never propagate nonnull because it may create UB due to // the noundef on @bar. if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne()) ValidPG.removeAttribute(Attribute::Alignment); if (ValidPG.hasAttributes()) { Attribute CBRange = ValidPG.getAttribute(Attribute::Range); if (CBRange.isValid()) { Attribute NewRange = AL.getRetAttr(Attribute::Range); if (NewRange.isValid()) { ValidPG.addRangeAttr( CBRange.getRange().intersectWith(NewRange.getRange())); } } // Three checks. // If the callsite has `noundef`, then a poison due to violating the // return attribute will create UB anyways so we can always propagate. // Otherwise, if the return value (callee to be inlined) has `noundef`, we // can't propagate as a new poison return will cause UB. // Finally, check if the return value has no uses whose behavior may // change/may cause UB if we potentially return poison. At the moment this // is implemented overly conservatively with a single-use check. // TODO: Update the single-use check to iterate through uses and only bail // if we have a potentially dangerous use. if (CB.hasRetAttr(Attribute::NoUndef) || (RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef))) NewAL = NewAL.addRetAttributes(Context, ValidPG); } NewRetVal->setAttributes(NewAL); } } /// If the inlined function has non-byval align arguments, then /// add @llvm.assume-based alignment assumptions to preserve this information. static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache) return; AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller()); auto &DL = CB.getDataLayout(); // To avoid inserting redundant assumptions, we should check for assumptions // already in the caller. To do this, we might need a DT of the caller. DominatorTree DT; bool DTCalculated = false; Function *CalledFunc = CB.getCalledFunction(); for (Argument &Arg : CalledFunc->args()) { if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() || Arg.hasNUses(0)) continue; MaybeAlign Alignment = Arg.getParamAlign(); if (!Alignment) continue; if (!DTCalculated) { DT.recalculate(*CB.getCaller()); DTCalculated = true; } // If we can already prove the asserted alignment in the context of the // caller, then don't bother inserting the assumption. Value *ArgVal = CB.getArgOperand(Arg.getArgNo()); if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment) continue; CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption( DL, ArgVal, Alignment->value()); AC->registerAssumption(cast(NewAsmp)); } } static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI, Function *CalledFunc) { IRBuilder<> Builder(InsertBlock, InsertBlock->begin()); Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType)); // Always generate a memcpy of alignment 1 here because we don't know // the alignment of the src pointer. Other optimizations can infer // better alignment. CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src, /*SrcAlign*/ Align(1), Size); // The verifier requires that all calls of debug-info-bearing functions // from debug-info-bearing functions have a debug location (for inlining // purposes). Assign a dummy location to satisfy the constraint. if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram()) if (DISubprogram *SP = CalledFunc->getSubprogram()) CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP)); } /// When inlining a call site that has a byval argument, /// we have to make the implicit memcpy explicit by adding it. static Value *HandleByValArgument(Type *ByValType, Value *Arg, Instruction *TheCall, const Function *CalledFunc, InlineFunctionInfo &IFI, MaybeAlign ByValAlignment) { Function *Caller = TheCall->getFunction(); const DataLayout &DL = Caller->getDataLayout(); // If the called function is readonly, then it could not mutate the caller's // copy of the byval'd memory. In this case, it is safe to elide the copy and // temporary. if (CalledFunc->onlyReadsMemory()) { // If the byval argument has a specified alignment that is greater than the // passed in pointer, then we either have to round up the input pointer or // give up on this transformation. if (ByValAlignment.valueOrOne() == 1) return Arg; AssumptionCache *AC = IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; // If the pointer is already known to be sufficiently aligned, or if we can // round it up to a larger alignment, then we don't need a temporary. if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >= *ByValAlignment) return Arg; // Otherwise, we have to make a memcpy to get a safe alignment. This is bad // for code quality, but rarely happens and is required for correctness. } // Create the alloca. If we have DataLayout, use nice alignment. Align Alignment = DL.getPrefTypeAlign(ByValType); // If the byval had an alignment specified, we *must* use at least that // alignment, as it is required by the byval argument (and uses of the // pointer inside the callee). if (ByValAlignment) Alignment = std::max(Alignment, *ByValAlignment); AllocaInst *NewAlloca = new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(), nullptr, Alignment, Arg->getName()); NewAlloca->insertBefore(Caller->begin()->begin()); IFI.StaticAllocas.push_back(NewAlloca); // Uses of the argument in the function should use our new alloca // instead. return NewAlloca; } // Check whether this Value is used by a lifetime intrinsic. static bool isUsedByLifetimeMarker(Value *V) { for (User *U : V->users()) if (IntrinsicInst *II = dyn_cast(U)) if (II->isLifetimeStartOrEnd()) return true; return false; } // Check whether the given alloca already has // lifetime.start or lifetime.end intrinsics. static bool hasLifetimeMarkers(AllocaInst *AI) { Type *Ty = AI->getType(); Type *Int8PtrTy = PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace()); if (Ty == Int8PtrTy) return isUsedByLifetimeMarker(AI); // Do a scan to find all the casts to i8*. for (User *U : AI->users()) { if (U->getType() != Int8PtrTy) continue; if (U->stripPointerCasts() != AI) continue; if (isUsedByLifetimeMarker(U)) return true; } return false; } /// Return the result of AI->isStaticAlloca() if AI were moved to the entry /// block. Allocas used in inalloca calls and allocas of dynamic array size /// cannot be static. static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) { return isa(AI->getArraySize()) && !AI->isUsedWithInAlloca(); } /// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL /// inlined at \p InlinedAt. \p IANodes is an inlined-at cache. static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt, LLVMContext &Ctx, DenseMap &IANodes) { auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes); return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(), OrigDL.getScope(), IA); } /// Update inlined instructions' line numbers to /// to encode location where these instructions are inlined. static void fixupLineNumbers(Function *Fn, Function::iterator FI, Instruction *TheCall, bool CalleeHasDebugInfo) { const DebugLoc &TheCallDL = TheCall->getDebugLoc(); if (!TheCallDL) return; auto &Ctx = Fn->getContext(); DILocation *InlinedAtNode = TheCallDL; // Create a unique call site, not to be confused with any other call from the // same location. InlinedAtNode = DILocation::getDistinct( Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(), InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt()); // Cache the inlined-at nodes as they're built so they are reused, without // this every instruction's inlined-at chain would become distinct from each // other. DenseMap IANodes; // Check if we are not generating inline line tables and want to use // the call site location instead. bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables"); // Helper-util for updating the metadata attached to an instruction. auto UpdateInst = [&](Instruction &I) { // Loop metadata needs to be updated so that the start and end locs // reference inlined-at locations. auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode, &IANodes](Metadata *MD) -> Metadata * { if (auto *Loc = dyn_cast_or_null(MD)) return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get(); return MD; }; updateLoopMetadataDebugLocations(I, updateLoopInfoLoc); if (!NoInlineLineTables) if (DebugLoc DL = I.getDebugLoc()) { DebugLoc IDL = inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes); I.setDebugLoc(IDL); return; } if (CalleeHasDebugInfo && !NoInlineLineTables) return; // If the inlined instruction has no line number, or if inline info // is not being generated, make it look as if it originates from the call // location. This is important for ((__always_inline, __nodebug__)) // functions which must use caller location for all instructions in their // function body. // Don't update static allocas, as they may get moved later. if (auto *AI = dyn_cast(&I)) if (allocaWouldBeStaticInEntry(AI)) return; // Do not force a debug loc for pseudo probes, since they do not need to // be debuggable, and also they are expected to have a zero/null dwarf // discriminator at this point which could be violated otherwise. if (isa(I)) return; I.setDebugLoc(TheCallDL); }; // Helper-util for updating debug-info records attached to instructions. auto UpdateDVR = [&](DbgRecord *DVR) { assert(DVR->getDebugLoc() && "Debug Value must have debug loc"); if (NoInlineLineTables) { DVR->setDebugLoc(TheCallDL); return; } DebugLoc DL = DVR->getDebugLoc(); DebugLoc IDL = inlineDebugLoc(DL, InlinedAtNode, DVR->getMarker()->getParent()->getContext(), IANodes); DVR->setDebugLoc(IDL); }; // Iterate over all instructions, updating metadata and debug-info records. for (; FI != Fn->end(); ++FI) { for (Instruction &I : *FI) { UpdateInst(I); for (DbgRecord &DVR : I.getDbgRecordRange()) { UpdateDVR(&DVR); } } // Remove debug info intrinsics if we're not keeping inline info. if (NoInlineLineTables) { BasicBlock::iterator BI = FI->begin(); while (BI != FI->end()) { if (isa(BI)) { BI = BI->eraseFromParent(); continue; } else { BI->dropDbgRecords(); } ++BI; } } } } #undef DEBUG_TYPE #define DEBUG_TYPE "assignment-tracking" /// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB. static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL, const CallBase &CB) { at::StorageToVarsMap EscapedLocals; SmallPtrSet SeenBases; LLVM_DEBUG( errs() << "# Finding caller local variables escaped by callee\n"); for (const Value *Arg : CB.args()) { LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n"); if (!Arg->getType()->isPointerTy()) { LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n"); continue; } const Instruction *I = dyn_cast(Arg); if (!I) { LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n"); continue; } // Walk back to the base storage. assert(Arg->getType()->isPtrOrPtrVectorTy()); APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false); const AllocaInst *Base = dyn_cast( Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true)); if (!Base) { LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n"); continue; } assert(Base); LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n"); // We only need to process each base address once - skip any duplicates. if (!SeenBases.insert(Base).second) continue; // Find all local variables associated with the backing storage. auto CollectAssignsForStorage = [&](auto *DbgAssign) { // Skip variables from inlined functions - they are not local variables. if (DbgAssign->getDebugLoc().getInlinedAt()) return; LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n"); EscapedLocals[Base].insert(at::VarRecord(DbgAssign)); }; for_each(at::getAssignmentMarkers(Base), CollectAssignsForStorage); for_each(at::getDVRAssignmentMarkers(Base), CollectAssignsForStorage); } return EscapedLocals; } static void trackInlinedStores(Function::iterator Start, Function::iterator End, const CallBase &CB) { LLVM_DEBUG(errs() << "trackInlinedStores into " << Start->getParent()->getName() << " from " << CB.getCalledFunction()->getName() << "\n"); std::unique_ptr DL = std::make_unique(CB.getModule()); at::trackAssignments(Start, End, collectEscapedLocals(*DL, CB), *DL); } /// Update inlined instructions' DIAssignID metadata. We need to do this /// otherwise a function inlined more than once into the same function /// will cause DIAssignID to be shared by many instructions. static void fixupAssignments(Function::iterator Start, Function::iterator End) { DenseMap Map; // Loop over all the inlined instructions. If we find a DIAssignID // attachment or use, replace it with a new version. for (auto BBI = Start; BBI != End; ++BBI) { for (Instruction &I : *BBI) at::remapAssignID(Map, I); } } #undef DEBUG_TYPE #define DEBUG_TYPE "inline-function" /// Update the block frequencies of the caller after a callee has been inlined. /// /// Each block cloned into the caller has its block frequency scaled by the /// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of /// callee's entry block gets the same frequency as the callsite block and the /// relative frequencies of all cloned blocks remain the same after cloning. static void updateCallerBFI(BasicBlock *CallSiteBlock, const ValueToValueMapTy &VMap, BlockFrequencyInfo *CallerBFI, BlockFrequencyInfo *CalleeBFI, const BasicBlock &CalleeEntryBlock) { SmallPtrSet ClonedBBs; for (auto Entry : VMap) { if (!isa(Entry.first) || !Entry.second) continue; auto *OrigBB = cast(Entry.first); auto *ClonedBB = cast(Entry.second); BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB); if (!ClonedBBs.insert(ClonedBB).second) { // Multiple blocks in the callee might get mapped to one cloned block in // the caller since we prune the callee as we clone it. When that happens, // we want to use the maximum among the original blocks' frequencies. BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB); if (NewFreq > Freq) Freq = NewFreq; } CallerBFI->setBlockFreq(ClonedBB, Freq); } BasicBlock *EntryClone = cast(VMap.lookup(&CalleeEntryBlock)); CallerBFI->setBlockFreqAndScale( EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs); } /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const ProfileCount &CalleeEntryCount, const CallBase &TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI) { if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) return; auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt; int64_t CallCount = std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount()); updateProfileCallee(Callee, -CallCount, &VMap); } void llvm::updateProfileCallee( Function *Callee, int64_t EntryDelta, const ValueMap *VMap) { auto CalleeCount = Callee->getEntryCount(); if (!CalleeCount) return; const uint64_t PriorEntryCount = CalleeCount->getCount(); // Since CallSiteCount is an estimate, it could exceed the original callee // count and has to be set to 0 so guard against underflow. const uint64_t NewEntryCount = (EntryDelta < 0 && static_cast(-EntryDelta) > PriorEntryCount) ? 0 : PriorEntryCount + EntryDelta; auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, const uint64_t PriorEntryCount) { Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB); if (VPtr) scaleProfData(*VPtr, NewEntryCount, PriorEntryCount); }; // During inlining ? if (VMap) { uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; for (auto Entry : *VMap) { if (isa(Entry.first)) if (auto *CI = dyn_cast_or_null(Entry.second)) { CI->updateProfWeight(CloneEntryCount, PriorEntryCount); updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); } if (isa(Entry.first)) if (auto *II = dyn_cast_or_null(Entry.second)) { II->updateProfWeight(CloneEntryCount, PriorEntryCount); updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); } } } if (EntryDelta) { Callee->setEntryCount(NewEntryCount); for (BasicBlock &BB : *Callee) // No need to update the callsite if it is pruned during inlining. if (!VMap || VMap->count(&BB)) for (Instruction &I : BB) { if (CallInst *CI = dyn_cast(&I)) { CI->updateProfWeight(NewEntryCount, PriorEntryCount); updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); } if (InvokeInst *II = dyn_cast(&I)) { II->updateProfWeight(NewEntryCount, PriorEntryCount); updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); } } } } /// An operand bundle "clang.arc.attachedcall" on a call indicates the call /// result is implicitly consumed by a call to retainRV or claimRV immediately /// after the call. This function inlines the retainRV/claimRV calls. /// /// There are three cases to consider: /// /// 1. If there is a call to autoreleaseRV that takes a pointer to the returned /// object in the callee return block, the autoreleaseRV call and the /// retainRV/claimRV call in the caller cancel out. If the call in the caller /// is a claimRV call, a call to objc_release is emitted. /// /// 2. If there is a call in the callee return block that doesn't have operand /// bundle "clang.arc.attachedcall", the operand bundle on the original call /// is transferred to the call in the callee. /// /// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is /// a retainRV call. static void inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, const SmallVectorImpl &Returns) { Module *Mod = CB.getModule(); assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function"); bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV, IsUnsafeClaimRV = !IsRetainRV; for (auto *RI : Returns) { Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0)); bool InsertRetainCall = IsRetainRV; IRBuilder<> Builder(RI->getContext()); // Walk backwards through the basic block looking for either a matching // autoreleaseRV call or an unannotated call. auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()), RI->getParent()->rend()); for (Instruction &I : llvm::make_early_inc_range(InstRange)) { // Ignore casts. if (isa(I)) continue; if (auto *II = dyn_cast(&I)) { if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue || !II->hasNUses(0) || objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd) break; // If we've found a matching authoreleaseRV call: // - If claimRV is attached to the call, insert a call to objc_release // and erase the autoreleaseRV call. // - If retainRV is attached to the call, just erase the autoreleaseRV // call. if (IsUnsafeClaimRV) { Builder.SetInsertPoint(II); Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_release); Builder.CreateCall(IFn, RetOpnd, ""); } II->eraseFromParent(); InsertRetainCall = false; break; } auto *CI = dyn_cast(&I); if (!CI) break; if (objcarc::GetRCIdentityRoot(CI) != RetOpnd || objcarc::hasAttachedCallOpBundle(CI)) break; // If we've found an unannotated call that defines RetOpnd, add a // "clang.arc.attachedcall" operand bundle. Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)}; OperandBundleDef OB("clang.arc.attachedcall", BundleArgs); auto *NewCall = CallBase::addOperandBundle( CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI->getIterator()); NewCall->copyMetadata(*CI); CI->replaceAllUsesWith(NewCall); CI->eraseFromParent(); InsertRetainCall = false; break; } if (InsertRetainCall) { // The retainRV is attached to the call and we've failed to find a // matching autoreleaseRV or an annotated call in the callee. Emit a call // to objc_retain. Builder.SetInsertPoint(RI); Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain); Builder.CreateCall(IFn, RetOpnd, ""); } } } /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now /// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, bool MergeAttributes, AAResults *CalleeAAR, bool InsertLifetime, Function *ForwardVarArgsTo) { assert(CB.getParent() && CB.getFunction() && "Instruction not in function!"); // FIXME: we don't inline callbr yet. if (isa(CB)) return InlineResult::failure("We don't inline callbr yet."); // If IFI has any state in it, zap it before we fill it in. IFI.reset(); Function *CalledFunc = CB.getCalledFunction(); if (!CalledFunc || // Can't inline external function or indirect CalledFunc->isDeclaration()) // call! return InlineResult::failure("external or indirect"); // The inliner does not know how to inline through calls with operand bundles // in general ... Value *ConvergenceControlToken = nullptr; if (CB.hasOperandBundles()) { for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) { auto OBUse = CB.getOperandBundleAt(i); uint32_t Tag = OBUse.getTagID(); // ... but it knows how to inline through "deopt" operand bundles ... if (Tag == LLVMContext::OB_deopt) continue; // ... and "funclet" operand bundles. if (Tag == LLVMContext::OB_funclet) continue; if (Tag == LLVMContext::OB_clang_arc_attachedcall) continue; if (Tag == LLVMContext::OB_kcfi) continue; if (Tag == LLVMContext::OB_convergencectrl) { ConvergenceControlToken = OBUse.Inputs[0].get(); continue; } return InlineResult::failure("unsupported operand bundle"); } } // FIXME: The check below is redundant and incomplete. According to spec, if a // convergent call is missing a token, then the caller is using uncontrolled // convergence. If the callee has an entry intrinsic, then the callee is using // controlled convergence, and the call cannot be inlined. A proper // implemenation of this check requires a whole new analysis that identifies // convergence in every function. For now, we skip that and just do this one // cursory check. The underlying assumption is that in a compiler flow that // fully implements convergence control tokens, there is no mixing of // controlled and uncontrolled convergent operations in the whole program. if (CB.isConvergent()) { auto *I = CalledFunc->getEntryBlock().getFirstNonPHI(); if (auto *IntrinsicCall = dyn_cast(I)) { if (IntrinsicCall->getIntrinsicID() == Intrinsic::experimental_convergence_entry) { if (!ConvergenceControlToken) { return InlineResult::failure( "convergent call needs convergencectrl operand"); } } } } // If the call to the callee cannot throw, set the 'nounwind' flag on any // calls that we inline. bool MarkNoUnwind = CB.doesNotThrow(); BasicBlock *OrigBB = CB.getParent(); Function *Caller = OrigBB->getParent(); // GC poses two hazards to inlining, which only occur when the callee has GC: // 1. If the caller has no GC, then the callee's GC must be propagated to the // caller. // 2. If the caller has a differing GC, it is invalid to inline. if (CalledFunc->hasGC()) { if (!Caller->hasGC()) Caller->setGC(CalledFunc->getGC()); else if (CalledFunc->getGC() != Caller->getGC()) return InlineResult::failure("incompatible GC"); } // Get the personality function from the callee if it contains a landing pad. Constant *CalledPersonality = CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn()->stripPointerCasts() : nullptr; // Find the personality function used by the landing pads of the caller. If it // exists, then check to see that it matches the personality function used in // the callee. Constant *CallerPersonality = Caller->hasPersonalityFn() ? Caller->getPersonalityFn()->stripPointerCasts() : nullptr; if (CalledPersonality) { if (!CallerPersonality) Caller->setPersonalityFn(CalledPersonality); // If the personality functions match, then we can perform the // inlining. Otherwise, we can't inline. // TODO: This isn't 100% true. Some personality functions are proper // supersets of others and can be used in place of the other. else if (CalledPersonality != CallerPersonality) return InlineResult::failure("incompatible personality"); } // We need to figure out which funclet the callsite was in so that we may // properly nest the callee. Instruction *CallSiteEHPad = nullptr; if (CallerPersonality) { EHPersonality Personality = classifyEHPersonality(CallerPersonality); if (isScopedEHPersonality(Personality)) { std::optional ParentFunclet = CB.getOperandBundle(LLVMContext::OB_funclet); if (ParentFunclet) CallSiteEHPad = cast(ParentFunclet->Inputs.front()); // OK, the inlining site is legal. What about the target function? if (CallSiteEHPad) { if (Personality == EHPersonality::MSVC_CXX) { // The MSVC personality cannot tolerate catches getting inlined into // cleanup funclets. if (isa(CallSiteEHPad)) { // Ok, the call site is within a cleanuppad. Let's check the callee // for catchpads. for (const BasicBlock &CalledBB : *CalledFunc) { if (isa(CalledBB.getFirstNonPHI())) return InlineResult::failure("catch in cleanup funclet"); } } } else if (isAsynchronousEHPersonality(Personality)) { // SEH is even less tolerant, there may not be any sort of exceptional // funclet in the callee. for (const BasicBlock &CalledBB : *CalledFunc) { if (CalledBB.isEHPad()) return InlineResult::failure("SEH in cleanup funclet"); } } } } } // Determine if we are dealing with a call in an EHPad which does not unwind // to caller. bool EHPadForCallUnwindsLocally = false; if (CallSiteEHPad && isa(CB)) { UnwindDestMemoTy FuncletUnwindMap; Value *CallSiteUnwindDestToken = getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap); EHPadForCallUnwindsLocally = CallSiteUnwindDestToken && !isa(CallSiteUnwindDestToken); } // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = --Caller->end(); // Make sure to capture all of the return instructions from the cloned // function. SmallVector Returns; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; { // Scope to destroy VMap after cloning. ValueToValueMapTy VMap; struct ByValInit { Value *Dst; Value *Src; Type *Ty; }; // Keep a list of pair (dst, src) to emit byval initializations. SmallVector ByValInits; // When inlining a function that contains noalias scope metadata, // this metadata needs to be cloned so that the inlined blocks // have different "unique scopes" at every call site. // Track the metadata that must be cloned. Do this before other changes to // the function, so that we do not get in trouble when inlining caller == // callee. ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction()); auto &DL = Caller->getDataLayout(); // Calculate the vector of arguments to pass into the function cloner, which // matches up the formal to the actual argument values. auto AI = CB.arg_begin(); unsigned ArgNo = 0; for (Function::arg_iterator I = CalledFunc->arg_begin(), E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) { Value *ActualArg = *AI; // When byval arguments actually inlined, we need to make the copy implied // by them explicit. However, we don't do this if the callee is readonly // or readnone, because the copy would be unneeded: the callee doesn't // modify the struct. if (CB.isByValArgument(ArgNo)) { ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg, &CB, CalledFunc, IFI, CalledFunc->getParamAlign(ArgNo)); if (ActualArg != *AI) ByValInits.push_back( {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)}); } VMap[&*I] = ActualArg; } // TODO: Remove this when users have been updated to the assume bundles. // Add alignment assumptions if necessary. We do this before the inlined // instructions are actually cloned into the caller so that we can easily // check what will be known at the start of the inlined code. AddAlignmentAssumptions(CB, IFI); AssumptionCache *AC = IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; /// Preserve all attributes on of the call and its parameters. salvageKnowledge(&CB, AC); // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, /*ModuleLevelChanges=*/false, Returns, ".i", &InlinedFunctionInfo); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; // Insert retainRV/clainRV runtime calls. objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB); if (RVCallKind != objcarc::ARCInstKind::None) inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns); // Updated caller/callee profiles only when requested. For sample loader // inlining, the context-sensitive inlinee profile doesn't need to be // subtracted from callee profile, and the inlined clone also doesn't need // to be scaled based on call site count. if (IFI.UpdateProfile) { if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr) // Update the BFI of blocks cloned into the caller. updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI, CalledFunc->front()); if (auto Profile = CalledFunc->getEntryCount()) updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI, IFI.CallerBFI); } // Inject byval arguments initialization. for (ByValInit &Init : ByValInits) HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(), &*FirstNewBlock, IFI, CalledFunc); std::optional ParentDeopt = CB.getOperandBundle(LLVMContext::OB_deopt); if (ParentDeopt) { SmallVector OpDefs; for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { CallBase *ICS = dyn_cast_or_null(VH); if (!ICS) continue; // instruction was DCE'd or RAUW'ed to undef OpDefs.clear(); OpDefs.reserve(ICS->getNumOperandBundles()); for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe; ++COBi) { auto ChildOB = ICS->getOperandBundleAt(COBi); if (ChildOB.getTagID() != LLVMContext::OB_deopt) { // If the inlined call has other operand bundles, let them be OpDefs.emplace_back(ChildOB); continue; } // It may be useful to separate this logic (of handling operand // bundles) out to a separate "policy" component if this gets crowded. // Prepend the parent's deoptimization continuation to the newly // inlined call's deoptimization continuation. std::vector MergedDeoptArgs; MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() + ChildOB.Inputs.size()); llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs); llvm::append_range(MergedDeoptArgs, ChildOB.Inputs); OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); } Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS->getIterator()); // Note: the RAUW does the appropriate fixup in VMap, so we need to do // this even if the call returns void. ICS->replaceAllUsesWith(NewI); VH = nullptr; ICS->eraseFromParent(); } } // For 'nodebug' functions, the associated DISubprogram is always null. // Conservatively avoid propagating the callsite debug location to // instructions inlined from a function whose DISubprogram is not null. fixupLineNumbers(Caller, FirstNewBlock, &CB, CalledFunc->getSubprogram() != nullptr); if (isAssignmentTrackingEnabled(*Caller->getParent())) { // Interpret inlined stores to caller-local variables as assignments. trackInlinedStores(FirstNewBlock, Caller->end(), CB); // Update DIAssignID metadata attachments and uses so that they are // unique to this inlined instance. fixupAssignments(FirstNewBlock, Caller->end()); } // Now clone the inlined noalias scope metadata. SAMetadataCloner.clone(); SAMetadataCloner.remap(FirstNewBlock, Caller->end()); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo); // Clone return attributes on the callsite into the calls within the inlined // function which feed into its return value. AddReturnAttributes(CB, VMap, InlinedFunctionInfo); // Clone attributes on the params of the callsite to calls within the // inlined function which use the same param. AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo); propagateMemProfMetadata(CalledFunc, CB, InlinedFunctionInfo.ContainsMemProfMetadata, VMap); // Propagate metadata on the callsite if necessary. PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end()); // Register any cloned assumptions. if (IFI.GetAssumptionCache) for (BasicBlock &NewBlock : make_range(FirstNewBlock->getIterator(), Caller->end())) for (Instruction &I : NewBlock) if (auto *II = dyn_cast(&I)) IFI.GetAssumptionCache(*Caller).registerAssumption(II); } if (ConvergenceControlToken) { auto *I = FirstNewBlock->getFirstNonPHI(); if (auto *IntrinsicCall = dyn_cast(I)) { if (IntrinsicCall->getIntrinsicID() == Intrinsic::experimental_convergence_entry) { IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); IntrinsicCall->eraseFromParent(); } } } // If there are any alloca instructions in the block that used to be the entry // block for the callee, move them to the entry block of the caller. First // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { BasicBlock::iterator InsertPoint = Caller->begin()->begin(); for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast(I++); if (!AI) continue; // If the alloca is now dead, remove it. This often occurs due to code // specialization. if (AI->use_empty()) { AI->eraseFromParent(); continue; } if (!allocaWouldBeStaticInEntry(AI)) continue; // Keep track of the static allocas that we inline into the caller. IFI.StaticAllocas.push_back(AI); // Scan for the block of allocas that we can move over, and move them // all at once. while (isa(I) && !cast(I)->use_empty() && allocaWouldBeStaticInEntry(cast(I))) { IFI.StaticAllocas.push_back(cast(I)); ++I; } // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. I.setTailBit(true); Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock, AI->getIterator(), I); } } SmallVector VarArgsToForward; SmallVector VarArgsAttrs; for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); i < CB.arg_size(); i++) { VarArgsToForward.push_back(CB.getArgOperand(i)); VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i)); } bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false; if (InlinedFunctionInfo.ContainsCalls) { CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None; if (CallInst *CI = dyn_cast(&CB)) CallSiteTailKind = CI->getTailCallKind(); // For inlining purposes, the "notail" marker is the same as no marker. if (CallSiteTailKind == CallInst::TCK_NoTail) CallSiteTailKind = CallInst::TCK_None; for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB) { for (Instruction &I : llvm::make_early_inc_range(*BB)) { CallInst *CI = dyn_cast(&I); if (!CI) continue; // Forward varargs from inlined call site to calls to the // ForwardVarArgsTo function, if requested, and to musttail calls. if (!VarArgsToForward.empty() && ((ForwardVarArgsTo && CI->getCalledFunction() == ForwardVarArgsTo) || CI->isMustTailCall())) { // Collect attributes for non-vararg parameters. AttributeList Attrs = CI->getAttributes(); SmallVector ArgAttrs; if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { for (unsigned ArgNo = 0; ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo)); } // Add VarArg attributes. ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(), Attrs.getRetAttrs(), ArgAttrs); // Add VarArgs to existing parameters. SmallVector Params(CI->args()); Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); CallInst *NewCI = CallInst::Create( CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI->getIterator()); NewCI->setDebugLoc(CI->getDebugLoc()); NewCI->setAttributes(Attrs); NewCI->setCallingConv(CI->getCallingConv()); CI->replaceAllUsesWith(NewCI); CI->eraseFromParent(); CI = NewCI; } if (Function *F = CI->getCalledFunction()) InlinedDeoptimizeCalls |= F->getIntrinsicID() == Intrinsic::experimental_deoptimize; // We need to reduce the strength of any inlined tail calls. For // musttail, we have to avoid introducing potential unbounded stack // growth. For example, if functions 'f' and 'g' are mutually recursive // with musttail, we can inline 'g' into 'f' so long as we preserve // musttail on the cloned call to 'f'. If either the inlined call site // or the cloned call site is *not* musttail, the program already has // one frame of stack growth, so it's safe to remove musttail. Here is // a table of example transformations: // // f -> musttail g -> musttail f ==> f -> musttail f // f -> musttail g -> tail f ==> f -> tail f // f -> g -> musttail f ==> f -> f // f -> g -> tail f ==> f -> f // // Inlined notail calls should remain notail calls. CallInst::TailCallKind ChildTCK = CI->getTailCallKind(); if (ChildTCK != CallInst::TCK_NoTail) ChildTCK = std::min(CallSiteTailKind, ChildTCK); CI->setTailCallKind(ChildTCK); InlinedMustTailCalls |= CI->isMustTailCall(); // Call sites inlined through a 'nounwind' call site should be // 'nounwind' as well. However, avoid marking call sites explicitly // where possible. This helps expose more opportunities for CSE after // inlining, commonly when the callee is an intrinsic. if (MarkNoUnwind && !CI->doesNotThrow()) CI->setDoesNotThrow(); } } } // Leave lifetime markers for the static alloca's, scoping them to the // function we just inlined. // We need to insert lifetime intrinsics even at O0 to avoid invalid // access caused by multithreaded coroutines. The check // `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only. if ((InsertLifetime || Caller->isPresplitCoroutine()) && !IFI.StaticAllocas.empty()) { IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin()); for (AllocaInst *AI : IFI.StaticAllocas) { // Don't mark swifterror allocas. They can't have bitcast uses. if (AI->isSwiftError()) continue; // If the alloca is already scoped to something smaller than the whole // function then there's no need to add redundant, less accurate markers. if (hasLifetimeMarkers(AI)) continue; // Try to determine the size of the allocation. ConstantInt *AllocaSize = nullptr; if (ConstantInt *AIArraySize = dyn_cast(AI->getArraySize())) { auto &DL = Caller->getDataLayout(); Type *AllocaType = AI->getAllocatedType(); TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType); uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); // Don't add markers for zero-sized allocas. if (AllocaArraySize == 0) continue; // Check that array size doesn't saturate uint64_t and doesn't // overflow when it's multiplied by type size. if (!AllocaTypeSize.isScalable() && AllocaArraySize != std::numeric_limits::max() && std::numeric_limits::max() / AllocaArraySize >= AllocaTypeSize.getFixedValue()) { AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), AllocaArraySize * AllocaTypeSize); } } builder.CreateLifetimeStart(AI, AllocaSize); for (ReturnInst *RI : Returns) { // Don't insert llvm.lifetime.end calls between a musttail or deoptimize // call and a return. The return kills all local allocas. if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) continue; if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) continue; IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize); } } } // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. if (InlinedFunctionInfo.ContainsDynamicAllocas) { // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) .CreateStackSave("savedstack"); // Insert a call to llvm.stackrestore before any return instructions in the // inlined function. for (ReturnInst *RI : Returns) { // Don't insert llvm.stackrestore calls between a musttail or deoptimize // call and a return. The return will restore the stack pointer. if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall()) continue; if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall()) continue; IRBuilder<>(RI).CreateStackRestore(SavedPtr); } } // If we are inlining for an invoke instruction, we must make sure to rewrite // any call instructions into invoke instructions. This is sensitive to which // funclet pads were top-level in the inlinee, so must be done before // rewriting the "parent pad" links. if (auto *II = dyn_cast(&CB)) { BasicBlock *UnwindDest = II->getUnwindDest(); Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); if (isa(FirstNonPHI)) { HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); } else { HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); } } // Update the lexical scopes of the new funclets and callsites. // Anything that had 'none' as its parent is now nested inside the callsite's // EHPad. if (CallSiteEHPad) { for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) { // Add bundle operands to inlined call sites. PropagateOperandBundles(BB, CallSiteEHPad); // It is problematic if the inlinee has a cleanupret which unwinds to // caller and we inline it into a call site which doesn't unwind but into // an EH pad that does. Such an edge must be dynamically unreachable. // As such, we replace the cleanupret with unreachable. if (auto *CleanupRet = dyn_cast(BB->getTerminator())) if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally) changeToUnreachable(CleanupRet); Instruction *I = BB->getFirstNonPHI(); if (!I->isEHPad()) continue; if (auto *CatchSwitch = dyn_cast(I)) { if (isa(CatchSwitch->getParentPad())) CatchSwitch->setParentPad(CallSiteEHPad); } else { auto *FPI = cast(I); if (isa(FPI->getParentPad())) FPI->setParentPad(CallSiteEHPad); } } } if (InlinedDeoptimizeCalls) { // We need to at least remove the deoptimizing returns from the Return set, // so that the control flow from those returns does not get merged into the // caller (but terminate it instead). If the caller's return type does not // match the callee's return type, we also need to change the return type of // the intrinsic. if (Caller->getReturnType() == CB.getType()) { llvm::erase_if(Returns, [](ReturnInst *RI) { return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr; }); } else { SmallVector NormalReturns; Function *NewDeoptIntrinsic = Intrinsic::getDeclaration( Caller->getParent(), Intrinsic::experimental_deoptimize, {Caller->getReturnType()}); for (ReturnInst *RI : Returns) { CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall(); if (!DeoptCall) { NormalReturns.push_back(RI); continue; } // The calling convention on the deoptimize call itself may be bogus, // since the code we're inlining may have undefined behavior (and may // never actually execute at runtime); but all // @llvm.experimental.deoptimize declarations have to have the same // calling convention in a well-formed module. auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv(); NewDeoptIntrinsic->setCallingConv(CallingConv); auto *CurBB = RI->getParent(); RI->eraseFromParent(); SmallVector CallArgs(DeoptCall->args()); SmallVector OpBundles; DeoptCall->getOperandBundlesAsDefs(OpBundles); auto DeoptAttributes = DeoptCall->getAttributes(); DeoptCall->eraseFromParent(); assert(!OpBundles.empty() && "Expected at least the deopt operand bundle"); IRBuilder<> Builder(CurBB); CallInst *NewDeoptCall = Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles); NewDeoptCall->setCallingConv(CallingConv); NewDeoptCall->setAttributes(DeoptAttributes); if (NewDeoptCall->getType()->isVoidTy()) Builder.CreateRetVoid(); else Builder.CreateRet(NewDeoptCall); // Since the ret type is changed, remove the incompatible attributes. NewDeoptCall->removeRetAttrs( AttributeFuncs::typeIncompatible(NewDeoptCall->getType())); } // Leave behind the normal returns so we can merge control flow. std::swap(Returns, NormalReturns); } } // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the // phi below. if (InlinedMustTailCalls) { // Check if we need to bitcast the result of any musttail calls. Type *NewRetTy = Caller->getReturnType(); bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy; // Handle the returns preceded by musttail calls separately. SmallVector NormalReturns; for (ReturnInst *RI : Returns) { CallInst *ReturnedMustTail = RI->getParent()->getTerminatingMustTailCall(); if (!ReturnedMustTail) { NormalReturns.push_back(RI); continue; } if (!NeedBitCast) continue; // Delete the old return and any preceding bitcast. BasicBlock *CurBB = RI->getParent(); auto *OldCast = dyn_cast_or_null(RI->getReturnValue()); RI->eraseFromParent(); if (OldCast) OldCast->eraseFromParent(); // Insert a new bitcast and return with the right type. IRBuilder<> Builder(CurBB); Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy)); } // Leave behind the normal returns so we can merge control flow. std::swap(Returns, NormalReturns); } // Now that all of the transforms on the inlined code have taken place but // before we splice the inlined code into the CFG and lose track of which // blocks were actually inlined, collect the call sites. We only do this if // call graph updates weren't requested, as those provide value handle based // tracking of inlined call sites instead. Calls to intrinsics are not // collected because they are not inlineable. if (InlinedFunctionInfo.ContainsCalls) { // Otherwise just collect the raw call sites that were inlined. for (BasicBlock &NewBB : make_range(FirstNewBlock->getIterator(), Caller->end())) for (Instruction &I : NewBB) if (auto *CB = dyn_cast(&I)) if (!(CB->getCalledFunction() && CB->getCalledFunction()->isIntrinsic())) IFI.InlinedCallSites.push_back(CB); } // If we cloned in _exactly one_ basic block, and if that block ends in a // return instruction, we splice the body of the inlined callee directly into // the calling basic block. if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) { // Move all of the instructions right before the call. OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(), FirstNewBlock->end()); // Remove the cloned basic block. Caller->back().eraseFromParent(); // If the call site was an invoke instruction, add a branch to the normal // destination. if (InvokeInst *II = dyn_cast(&CB)) { BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), CB.getIterator()); NewBr->setDebugLoc(Returns[0]->getDebugLoc()); } // If the return instruction returned a value, replace uses of the call with // uses of the returned value. if (!CB.use_empty()) { ReturnInst *R = Returns[0]; if (&CB == R->getReturnValue()) CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); else CB.replaceAllUsesWith(R->getReturnValue()); } // Since we are now done with the Call/Invoke, we can delete it. CB.eraseFromParent(); // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); if (MergeAttributes) AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); // We are now done with the inlining. return InlineResult::success(); } // Otherwise, we have the normal case, of more than one block to inline or // multiple return sites. // We want to clone the entire callee function into the hole between the // "starter" and "ender" blocks. How we accomplish this depends on whether // this is an invoke instruction or a call instruction. BasicBlock *AfterCallBB; BranchInst *CreatedBranchToNormalDest = nullptr; if (InvokeInst *II = dyn_cast(&CB)) { // Add an unconditional branch to make this look like the CallInst case... CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), CB.getIterator()); // Split the basic block. This guarantees that no PHI nodes will have to be // updated due to new incoming edges, and make the invoke case more // symmetric to the call case. AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(), CalledFunc->getName() + ".exit"); } else { // It's a call // If this is a call instruction, we need to split the basic block that // the call lives in. // AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(), CalledFunc->getName() + ".exit"); } if (IFI.CallerBFI) { // Copy original BB's block frequency to AfterCallBB IFI.CallerBFI->setBlockFreq(AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB)); } // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // Instruction *Br = OrigBB->getTerminator(); assert(Br && Br->getOpcode() == Instruction::Br && "splitBasicBlock broken!"); Br->setOperand(0, &*FirstNewBlock); // Now that the function is correct, make it a little bit nicer. In // particular, move the basic blocks inserted from the end of the function // into the space made by splitting the source basic block. Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock, Caller->end()); // Handle all of the return instructions that we just cloned in, and eliminate // any users of the original call/invoke instruction. Type *RTy = CalledFunc->getReturnType(); PHINode *PHI = nullptr; if (Returns.size() > 1) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!CB.use_empty()) { PHI = PHINode::Create(RTy, Returns.size(), CB.getName()); PHI->insertBefore(AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. CB.replaceAllUsesWith(PHI); } // Loop over all of the return instructions adding entries to the PHI node // as appropriate. if (PHI) { for (ReturnInst *RI : Returns) { assert(RI->getReturnValue()->getType() == PHI->getType() && "Ret value not consistent in function!"); PHI->addIncoming(RI->getReturnValue(), RI->getParent()); } } // Add a branch to the merge points and remove return instructions. DebugLoc Loc; for (ReturnInst *RI : Returns) { BranchInst *BI = BranchInst::Create(AfterCallBB, RI->getIterator()); Loc = RI->getDebugLoc(); BI->setDebugLoc(Loc); RI->eraseFromParent(); } // We need to set the debug location to *somewhere* inside the // inlined function. The line number may be nonsensical, but the // instruction will at least be associated with the right // function. if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Loc); } else if (!Returns.empty()) { // Otherwise, if there is exactly one return value, just replace anything // using the return value of the call with the computed value. if (!CB.use_empty()) { if (&CB == Returns[0]->getReturnValue()) CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); else CB.replaceAllUsesWith(Returns[0]->getReturnValue()); } // Update PHI nodes that use the ReturnBB to use the AfterCallBB. BasicBlock *ReturnBB = Returns[0]->getParent(); ReturnBB->replaceAllUsesWith(AfterCallBB); // Splice the code from the return block into the block that it will return // to, which contains the code that was after the call. AfterCallBB->splice(AfterCallBB->begin(), ReturnBB); if (CreatedBranchToNormalDest) CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc()); // Delete the return instruction now and empty ReturnBB now. Returns[0]->eraseFromParent(); ReturnBB->eraseFromParent(); } else if (!CB.use_empty()) { // No returns, but something is using the return value of the call. Just // nuke the result. CB.replaceAllUsesWith(PoisonValue::get(CB.getType())); } // Since we are now done with the Call/Invoke, we can delete it. CB.eraseFromParent(); // If we inlined any musttail calls and the original return is now // unreachable, delete it. It can only contain a bitcast and ret. if (InlinedMustTailCalls && pred_empty(AfterCallBB)) AfterCallBB->eraseFromParent(); // We should always be able to fold the entry block of the function into the // single predecessor of the block... assert(cast(Br)->isUnconditional() && "splitBasicBlock broken!"); BasicBlock *CalleeEntry = cast(Br)->getSuccessor(0); // Splice the code entry block into calling block, right before the // unconditional branch. CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes OrigBB->splice(Br->getIterator(), CalleeEntry); // Remove the unconditional branch. Br->eraseFromParent(); // Now we can remove the CalleeEntry block, which is now empty. CalleeEntry->eraseFromParent(); // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. if (PHI) { AssumptionCache *AC = IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr; auto &DL = Caller->getDataLayout(); if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) { PHI->replaceAllUsesWith(V); PHI->eraseFromParent(); } } if (MergeAttributes) AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc); return InlineResult::success(); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 122279160cc7..95bf9f06bc33 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -1,448 +1,450 @@ //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This family of functions perform manipulations on Modules. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/xxhash.h" using namespace llvm; #define DEBUG_TYPE "moduleutils" static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, int Priority, Constant *Data) { IRBuilder<> IRB(M.getContext()); FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); // Get the current set of static global constructors and add the new ctor // to the list. SmallVector CurrentCtors; StructType *EltTy; if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { EltTy = cast(GVCtor->getValueType()->getArrayElementType()); if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); for (unsigned i = 0; i != n; ++i) CurrentCtors.push_back(cast(Init->getOperand(i))); } GVCtor->eraseFromParent(); } else { EltTy = StructType::get(IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), IRB.getPtrTy()); } // Build a 3 field global_ctor entry. We don't take a comdat key. Constant *CSVals[3]; CSVals[0] = IRB.getInt32(Priority); CSVals[1] = F; CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy()) : Constant::getNullValue(IRB.getPtrTy()); Constant *RuntimeCtorInit = ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements())); CurrentCtors.push_back(RuntimeCtorInit); // Create a new initializer. ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size()); Constant *NewInit = ConstantArray::get(AT, CurrentCtors); // Create the new global variable and replace all uses of // the old global variable with the new one. (void)new GlobalVariable(M, NewInit->getType(), false, GlobalValue::AppendingLinkage, NewInit, ArrayName); } void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) { appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data); } void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) { appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data); } static void collectUsedGlobals(GlobalVariable *GV, SmallSetVector &Init) { if (!GV || !GV->hasInitializer()) return; auto *CA = cast(GV->getInitializer()); for (Use &Op : CA->operands()) Init.insert(cast(Op)); } static void appendToUsedList(Module &M, StringRef Name, ArrayRef Values) { GlobalVariable *GV = M.getGlobalVariable(Name); SmallSetVector Init; collectUsedGlobals(GV, Init); if (GV) GV->eraseFromParent(); Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext()); for (auto *V : Values) Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy)); if (Init.empty()) return; ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size()); GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, ConstantArray::get(ATy, Init.getArrayRef()), Name); GV->setSection("llvm.metadata"); } void llvm::appendToUsed(Module &M, ArrayRef Values) { appendToUsedList(M, "llvm.used", Values); } void llvm::appendToCompilerUsed(Module &M, ArrayRef Values) { appendToUsedList(M, "llvm.compiler.used", Values); } static void removeFromUsedList(Module &M, StringRef Name, function_ref ShouldRemove) { GlobalVariable *GV = M.getNamedGlobal(Name); if (!GV) return; SmallSetVector Init; collectUsedGlobals(GV, Init); Type *ArrayEltTy = cast(GV->getValueType())->getElementType(); SmallVector NewInit; for (Constant *MaybeRemoved : Init) { if (!ShouldRemove(MaybeRemoved->stripPointerCasts())) NewInit.push_back(MaybeRemoved); } if (!NewInit.empty()) { ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size()); GlobalVariable *NewGV = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage, ConstantArray::get(ATy, NewInit), "", GV, GV->getThreadLocalMode(), GV->getAddressSpace()); NewGV->setSection(GV->getSection()); NewGV->takeName(GV); } GV->eraseFromParent(); } void llvm::removeFromUsedLists(Module &M, function_ref ShouldRemove) { removeFromUsedList(M, "llvm.used", ShouldRemove); removeFromUsedList(M, "llvm.compiler.used", ShouldRemove); } void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) { if (!M.getModuleFlag("kcfi")) return; // Matches CodeGenModule::CreateKCFITypeId in Clang. LLVMContext &Ctx = M.getContext(); MDBuilder MDB(Ctx); - F.setMetadata( - LLVMContext::MD_kcfi_type, - MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( - Type::getInt32Ty(Ctx), - static_cast(xxHash64(MangledType)))))); + std::string Type = MangledType.str(); + if (M.getModuleFlag("cfi-normalize-integers")) + Type += ".normalized"; + F.setMetadata(LLVMContext::MD_kcfi_type, + MDNode::get(Ctx, MDB.createConstant(ConstantInt::get( + Type::getInt32Ty(Ctx), + static_cast(xxHash64(Type)))))); // If the module was compiled with -fpatchable-function-entry, ensure // we use the same patchable-function-prefix. if (auto *MD = mdconst::extract_or_null( M.getModuleFlag("kcfi-offset"))) { if (unsigned Offset = MD->getZExtValue()) F.addFnAttr("patchable-function-prefix", std::to_string(Offset)); } } FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName, ArrayRef InitArgTypes, bool Weak) { assert(!InitName.empty() && "Expected init function name"); auto *VoidTy = Type::getVoidTy(M.getContext()); auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false); auto FnCallee = M.getOrInsertFunction(InitName, FnTy); auto *Fn = cast(FnCallee.getCallee()); if (Weak && Fn->isDeclaration()) Fn->setLinkage(Function::ExternalWeakLinkage); return FnCallee; } Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) { Function *Ctor = Function::createWithDefaultAttr( FunctionType::get(Type::getVoidTy(M.getContext()), false), GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(), CtorName, &M); Ctor->addFnAttr(Attribute::NoUnwind); setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void) BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); ReturnInst::Create(M.getContext(), CtorBB); // Ensure Ctor cannot be discarded, even if in a comdat. appendToUsed(M, {Ctor}); return Ctor; } std::pair llvm::createSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, ArrayRef InitArgTypes, ArrayRef InitArgs, StringRef VersionCheckName, bool Weak) { assert(!InitName.empty() && "Expected init function name"); assert(InitArgs.size() == InitArgTypes.size() && "Sanitizer's init function expects different number of arguments"); FunctionCallee InitFunction = declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak); Function *Ctor = createSanitizerCtor(M, CtorName); IRBuilder<> IRB(M.getContext()); BasicBlock *RetBB = &Ctor->getEntryBlock(); if (Weak) { RetBB->setName("ret"); auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB); auto *CallInitBB = BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB); auto *InitFn = cast(InitFunction.getCallee()); auto *InitFnPtr = PointerType::get(InitFn->getType(), InitFn->getAddressSpace()); IRB.SetInsertPoint(EntryBB); Value *InitNotNull = IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr)); IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB); IRB.SetInsertPoint(CallInitBB); } else { IRB.SetInsertPoint(RetBB->getTerminator()); } IRB.CreateCall(InitFunction, InitArgs); if (!VersionCheckName.empty()) { FunctionCallee VersionCheckFunction = M.getOrInsertFunction( VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false), AttributeList()); IRB.CreateCall(VersionCheckFunction, {}); } if (Weak) IRB.CreateBr(RetBB); return std::make_pair(Ctor, InitFunction); } std::pair llvm::getOrCreateSanitizerCtorAndInitFunctions( Module &M, StringRef CtorName, StringRef InitName, ArrayRef InitArgTypes, ArrayRef InitArgs, function_ref FunctionsCreatedCallback, StringRef VersionCheckName, bool Weak) { assert(!CtorName.empty() && "Expected ctor function name"); if (Function *Ctor = M.getFunction(CtorName)) // FIXME: Sink this logic into the module, similar to the handling of // globals. This will make moving to a concurrent model much easier. if (Ctor->arg_empty() || Ctor->getReturnType() == Type::getVoidTy(M.getContext())) return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)}; Function *Ctor; FunctionCallee InitFunction; std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions( M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak); FunctionsCreatedCallback(Ctor, InitFunction); return std::make_pair(Ctor, InitFunction); } void llvm::filterDeadComdatFunctions( SmallVectorImpl &DeadComdatFunctions) { SmallPtrSet MaybeDeadFunctions; SmallPtrSet MaybeDeadComdats; for (Function *F : DeadComdatFunctions) { MaybeDeadFunctions.insert(F); if (Comdat *C = F->getComdat()) MaybeDeadComdats.insert(C); } // Find comdats for which all users are dead now. SmallPtrSet DeadComdats; for (Comdat *C : MaybeDeadComdats) { auto IsUserDead = [&](GlobalObject *GO) { auto *F = dyn_cast(GO); return F && MaybeDeadFunctions.contains(F); }; if (all_of(C->getUsers(), IsUserDead)) DeadComdats.insert(C); } // Only keep functions which have no comdat or a dead comdat. erase_if(DeadComdatFunctions, [&](Function *F) { Comdat *C = F->getComdat(); return C && !DeadComdats.contains(C); }); } std::string llvm::getUniqueModuleId(Module *M) { MD5 Md5; bool ExportsSymbols = false; auto AddGlobal = [&](GlobalValue &GV) { if (GV.isDeclaration() || GV.getName().starts_with("llvm.") || !GV.hasExternalLinkage() || GV.hasComdat()) return; ExportsSymbols = true; Md5.update(GV.getName()); Md5.update(ArrayRef{0}); }; for (auto &F : *M) AddGlobal(F); for (auto &GV : M->globals()) AddGlobal(GV); for (auto &GA : M->aliases()) AddGlobal(GA); for (auto &IF : M->ifuncs()) AddGlobal(IF); if (!ExportsSymbols) return ""; MD5::MD5Result R; Md5.final(R); SmallString<32> Str; MD5::stringifyResult(R, Str); return ("." + Str).str(); } void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf, StringRef SectionName, Align Alignment) { // Embed the memory buffer into the module. Constant *ModuleConstant = ConstantDataArray::get( M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize())); GlobalVariable *GV = new GlobalVariable( M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage, ModuleConstant, "llvm.embedded.object"); GV->setSection(SectionName); GV->setAlignment(Alignment); LLVMContext &Ctx = M.getContext(); NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects"); Metadata *MDVals[] = {ConstantAsMetadata::get(GV), MDString::get(Ctx, SectionName)}; MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {})); appendToCompilerUsed(M, GV); } bool llvm::lowerGlobalIFuncUsersAsGlobalCtor( Module &M, ArrayRef FilteredIFuncsToLower) { SmallVector AllIFuncs; ArrayRef IFuncsToLower = FilteredIFuncsToLower; if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs for (GlobalIFunc &GI : M.ifuncs()) AllIFuncs.push_back(&GI); IFuncsToLower = AllIFuncs; } bool UnhandledUsers = false; LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); PointerType *TableEntryTy = PointerType::get(Ctx, DL.getProgramAddressSpace()); ArrayType *FuncPtrTableTy = ArrayType::get(TableEntryTy, IFuncsToLower.size()); Align PtrAlign = DL.getABITypeAlign(TableEntryTy); // Create a global table of function pointers we'll initialize in a global // constructor. auto *FuncPtrTable = new GlobalVariable( M, FuncPtrTableTy, false, GlobalValue::InternalLinkage, PoisonValue::get(FuncPtrTableTy), "", nullptr, GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace()); FuncPtrTable->setAlignment(PtrAlign); // Create a function to initialize the function pointer table. Function *NewCtor = Function::Create( FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage, DL.getProgramAddressSpace(), "", &M); BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor); IRBuilder<> InitBuilder(BB); size_t TableIndex = 0; for (GlobalIFunc *GI : IFuncsToLower) { Function *ResolvedFunction = GI->getResolverFunction(); // We don't know what to pass to a resolver function taking arguments // // FIXME: Is this even valid? clang and gcc don't complain but this // probably should be invalid IR. We could just pass through undef. if (!std::empty(ResolvedFunction->getFunctionType()->params())) { LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function " << ResolvedFunction->getName() << " with parameters\n"); UnhandledUsers = true; continue; } // Initialize the function pointer table. CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction); Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy); Constant *GEP = cast(InitBuilder.CreateConstInBoundsGEP2_32( FuncPtrTableTy, FuncPtrTable, 0, TableIndex++)); InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign); // Update all users to load a pointer from the global table. for (User *User : make_early_inc_range(GI->users())) { Instruction *UserInst = dyn_cast(User); if (!UserInst) { // TODO: Should handle constantexpr casts in user instructions. Probably // can't do much about constant initializers. UnhandledUsers = true; continue; } IRBuilder<> UseBuilder(UserInst); LoadInst *ResolvedTarget = UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign); Value *ResolvedCast = UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType()); UserInst->replaceUsesOfWith(GI, ResolvedCast); } // If we handled all users, erase the ifunc. if (GI->use_empty()) GI->eraseFromParent(); } InitBuilder.CreateRetVoid(); PointerType *ConstantDataTy = PointerType::get(Ctx, 0); // TODO: Is this the right priority? Probably should be before any other // constructors? const int Priority = 10; appendToGlobalCtors(M, NewCtor, Priority, ConstantPointerNull::get(ConstantDataTy)); return UnhandledUsers; } diff --git a/lib/clang/include/VCSVersion.inc b/lib/clang/include/VCSVersion.inc index 5b4ac649986a..b514dee27f3a 100644 --- a/lib/clang/include/VCSVersion.inc +++ b/lib/clang/include/VCSVersion.inc @@ -1,8 +1,8 @@ -#define LLVM_REVISION "llvmorg-19.1.2-0-g7ba7d8e2f7b6" +#define LLVM_REVISION "llvmorg-19.1.3-0-gab51eccf88f5" #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git" -#define CLANG_REVISION "llvmorg-19.1.2-0-g7ba7d8e2f7b6" +#define CLANG_REVISION "llvmorg-19.1.3-0-gab51eccf88f5" #define CLANG_REPOSITORY "https://github.com/llvm/llvm-project.git" -#define LLDB_REVISION "llvmorg-19.1.2-0-g7ba7d8e2f7b6" +#define LLDB_REVISION "llvmorg-19.1.3-0-gab51eccf88f5" #define LLDB_REPOSITORY "https://github.com/llvm/llvm-project.git" diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc index 163d9b601b93..2795c08bec37 100644 --- a/lib/clang/include/clang/Basic/Version.inc +++ b/lib/clang/include/clang/Basic/Version.inc @@ -1,8 +1,8 @@ -#define CLANG_VERSION 19.1.2 -#define CLANG_VERSION_STRING "19.1.2" +#define CLANG_VERSION 19.1.3 +#define CLANG_VERSION_STRING "19.1.3" #define CLANG_VERSION_MAJOR 19 #define CLANG_VERSION_MAJOR_STRING "19" #define CLANG_VERSION_MINOR 1 -#define CLANG_VERSION_PATCHLEVEL 2 +#define CLANG_VERSION_PATCHLEVEL 3 #define CLANG_VENDOR "FreeBSD " diff --git a/lib/clang/include/lld/Common/Version.inc b/lib/clang/include/lld/Common/Version.inc index f207be812952..834cd39007ca 100644 --- a/lib/clang/include/lld/Common/Version.inc +++ b/lib/clang/include/lld/Common/Version.inc @@ -1,4 +1,4 @@ // Local identifier in __FreeBSD_version style #define LLD_FREEBSD_VERSION 1400007 -#define LLD_VERSION_STRING "19.1.2 (FreeBSD llvmorg-19.1.2-0-g7ba7d8e2f7b6-" __XSTRING(LLD_FREEBSD_VERSION) ")" +#define LLD_VERSION_STRING "19.1.3 (FreeBSD llvmorg-19.1.3-0-gab51eccf88f5-" __XSTRING(LLD_FREEBSD_VERSION) ")" diff --git a/lib/clang/include/lldb/Version/Version.inc b/lib/clang/include/lldb/Version/Version.inc index be36ec215810..1e614fc8a0cc 100644 --- a/lib/clang/include/lldb/Version/Version.inc +++ b/lib/clang/include/lldb/Version/Version.inc @@ -1,6 +1,6 @@ -#define LLDB_VERSION 19.1.2 -#define LLDB_VERSION_STRING "19.1.2" +#define LLDB_VERSION 19.1.3 +#define LLDB_VERSION_STRING "19.1.3" #define LLDB_VERSION_MAJOR 19 #define LLDB_VERSION_MINOR 1 -#define LLDB_VERSION_PATCH 2 +#define LLDB_VERSION_PATCH 3 /* #undef LLDB_FULL_VERSION_STRING */ diff --git a/lib/clang/include/llvm/Config/config.h b/lib/clang/include/llvm/Config/config.h index 2ad54b836743..972b3c894dac 100644 --- a/lib/clang/include/llvm/Config/config.h +++ b/lib/clang/include/llvm/Config/config.h @@ -1,372 +1,372 @@ #ifndef CONFIG_H #define CONFIG_H // Include this header only under the llvm source tree. // This is a private header. /* Exported configuration */ #include "llvm/Config/llvm-config.h" /* Bug report URL. */ #define BUG_REPORT_URL "https://bugs.freebsd.org/submit/" /* Define to 1 to enable backtraces, and to 0 otherwise. */ #define ENABLE_BACKTRACES 1 /* Define to 1 to enable crash overrides, and to 0 otherwise. */ #define ENABLE_CRASH_OVERRIDES 1 /* Define to 1 to enable crash memory dumps, and to 0 otherwise. */ #define LLVM_ENABLE_CRASH_DUMPS 0 /* Define to 1 to prefer forward slashes on Windows, and to 0 prefer backslashes. */ #define LLVM_WINDOWS_PREFER_FORWARD_SLASH 0 /* Define to 1 if you have the `backtrace' function. */ #define HAVE_BACKTRACE TRUE #define BACKTRACE_HEADER /* Define to 1 if you have the header file. */ /* #undef HAVE_CRASHREPORTERCLIENT_H */ /* can use __crashreporter_info__ */ #if defined(__APPLE__) #define HAVE_CRASHREPORTER_INFO 1 #else #define HAVE_CRASHREPORTER_INFO 0 #endif /* Define to 1 if you have the declaration of `arc4random', and to 0 if you don't. */ #define HAVE_DECL_ARC4RANDOM 1 /* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you don't. */ #define HAVE_DECL_FE_ALL_EXCEPT 1 /* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you don't. */ #define HAVE_DECL_FE_INEXACT 1 /* Define to 1 if you have the declaration of `strerror_s', and to 0 if you don't. */ #define HAVE_DECL_STRERROR_S 0 /* Define to 1 if you have the header file. */ #define HAVE_DLFCN_H 1 /* Define if dlopen() is available on this platform. */ #define HAVE_DLOPEN 1 /* Define if dladdr() is available on this platform. */ #define HAVE_DLADDR 1 #if !defined(__arm__) || defined(__USING_SJLJ_EXCEPTIONS__) || defined(__ARM_DWARF_EH__) /* Define to 1 if we can register EH frames on this platform. */ #define HAVE_REGISTER_FRAME 1 /* Define to 1 if we can deregister EH frames on this platform. */ #define HAVE_DEREGISTER_FRAME 1 #endif // !arm || USING_SJLJ_EXCEPTIONS || ARM_DWARF_EH_ /* Define if __unw_add_dynamic_fde() is available on this platform. */ /* #undef HAVE_UNW_ADD_DYNAMIC_FDE */ /* Define to 1 if you have the header file. */ #define HAVE_ERRNO_H 1 /* Define to 1 if you have the header file. */ #define HAVE_FCNTL_H 1 /* Define to 1 if you have the header file. */ #define HAVE_FENV_H 1 /* Define if libffi is available on this platform. */ /* #undef HAVE_FFI_CALL */ /* Define to 1 if you have the header file. */ /* #undef HAVE_FFI_FFI_H */ /* Define to 1 if you have the header file. */ /* #undef HAVE_FFI_H */ /* Define to 1 if you have the `futimens' function. */ #define HAVE_FUTIMENS 1 /* Define to 1 if you have the `futimes' function. */ #define HAVE_FUTIMES 1 /* Define to 1 if you have the `getpagesize' function. */ #define HAVE_GETPAGESIZE 1 /* Define to 1 if you have the `getrlimit' function. */ #define HAVE_GETRLIMIT 1 /* Define to 1 if you have the `getrusage' function. */ #define HAVE_GETRUSAGE 1 /* Define to 1 if you have the `isatty' function. */ #define HAVE_ISATTY 1 /* Define to 1 if you have the `edit' library (-ledit). */ #define HAVE_LIBEDIT TRUE /* Define to 1 if you have the `pfm' library (-lpfm). */ /* #undef HAVE_LIBPFM */ /* Define to 1 if the `perf_branch_entry' struct has field cycles. */ /* #undef LIBPFM_HAS_FIELD_CYCLES */ /* Define to 1 if you have the `psapi' library (-lpsapi). */ /* #undef HAVE_LIBPSAPI */ /* Define to 1 if you have the `pthread' library (-lpthread). */ #define HAVE_LIBPTHREAD 1 /* Define to 1 if you have the `pthread_getname_np' function. */ #define HAVE_PTHREAD_GETNAME_NP 1 /* Define to 1 if you have the `pthread_setname_np' function. */ #define HAVE_PTHREAD_SETNAME_NP 1 /* Define to 1 if you have the header file. */ #if __has_include() #define HAVE_LINK_H 1 #else #define HAVE_LINK_H 0 #endif /* Define to 1 if you have the header file. */ #if __has_include() #define HAVE_MACH_MACH_H 1 #endif /* Define to 1 if you have the `mallctl' function. */ #if defined(__FreeBSD__) #define HAVE_MALLCTL 1 #endif /* Define to 1 if you have the `mallinfo' function. */ #if defined(__linux__) #define HAVE_MALLINFO 1 #endif /* Define to 1 if you have the `mallinfo2' function. */ /* #undef HAVE_MALLINFO2 */ /* Define to 1 if you have the header file. */ #if __has_include() #define HAVE_MALLOC_MALLOC_H 1 #endif /* Define to 1 if you have the `malloc_zone_statistics' function. */ #if defined(__APPLE__) #define HAVE_MALLOC_ZONE_STATISTICS 1 #endif /* Define to 1 if you have the `posix_spawn' function. */ #define HAVE_POSIX_SPAWN 1 /* Define to 1 if you have the `pread' function. */ #define HAVE_PREAD 1 /* Define to 1 if you have the header file. */ #define HAVE_PTHREAD_H 1 /* Have pthread_mutex_lock */ #define HAVE_PTHREAD_MUTEX_LOCK 1 /* Have pthread_rwlock_init */ #define HAVE_PTHREAD_RWLOCK_INIT 1 /* Define to 1 if you have the `sbrk' function. */ #define HAVE_SBRK 1 /* Define to 1 if you have the `setenv' function. */ #define HAVE_SETENV 1 /* Define to 1 if you have the `setrlimit' function. */ #define HAVE_SETRLIMIT 1 /* Define to 1 if you have the `sigaltstack' function. */ #define HAVE_SIGALTSTACK 1 /* Define to 1 if you have the header file. */ #define HAVE_SIGNAL_H 1 /* Define to 1 if you have the `strerror_r' function. */ #define HAVE_STRERROR_R 1 /* Define to 1 if you have the `sysconf' function. */ #define HAVE_SYSCONF 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_IOCTL_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_MMAN_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_PARAM_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_RESOURCE_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_STAT_H 1 /* Define to 1 if you have the header file. */ #define HAVE_SYS_TIME_H 1 /* Define to 1 if stat struct has st_mtimespec member .*/ #if !defined(__linux__) #define HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC 1 #endif /* Define to 1 if stat struct has st_mtim member. */ #if !defined(__APPLE__) #define HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC 1 #endif /* Define to 1 if you have the header file. */ #define HAVE_SYS_TYPES_H 1 /* Define to 1 if you have the header file. */ #define HAVE_TERMIOS_H 1 /* Define to 1 if you have the header file. */ #define HAVE_UNISTD_H 1 /* Define to 1 if you have the header file. */ /* #undef HAVE_VALGRIND_VALGRIND_H */ /* Have host's _alloca */ /* #undef HAVE__ALLOCA */ /* Define to 1 if you have the `_chsize_s' function. */ /* #undef HAVE__CHSIZE_S */ /* Define to 1 if you have the `_Unwind_Backtrace' function. */ #define HAVE__UNWIND_BACKTRACE 1 /* Have host's __alloca */ /* #undef HAVE___ALLOCA */ /* Have host's __ashldi3 */ /* #undef HAVE___ASHLDI3 */ /* Have host's __ashrdi3 */ /* #undef HAVE___ASHRDI3 */ /* Have host's __chkstk */ /* #undef HAVE___CHKSTK */ /* Have host's __chkstk_ms */ /* #undef HAVE___CHKSTK_MS */ /* Have host's __cmpdi2 */ /* #undef HAVE___CMPDI2 */ /* Have host's __divdi3 */ /* #undef HAVE___DIVDI3 */ /* Have host's __fixdfdi */ /* #undef HAVE___FIXDFDI */ /* Have host's __fixsfdi */ /* #undef HAVE___FIXSFDI */ /* Have host's __floatdidf */ /* #undef HAVE___FLOATDIDF */ /* Have host's __lshrdi3 */ /* #undef HAVE___LSHRDI3 */ /* Have host's __main */ /* #undef HAVE___MAIN */ /* Have host's __moddi3 */ /* #undef HAVE___MODDI3 */ /* Have host's __udivdi3 */ /* #undef HAVE___UDIVDI3 */ /* Have host's __umoddi3 */ /* #undef HAVE___UMODDI3 */ /* Have host's ___chkstk */ /* #undef HAVE____CHKSTK */ /* Have host's ___chkstk_ms */ /* #undef HAVE____CHKSTK_MS */ /* Linker version detected at compile time. */ /* #undef HOST_LINK_VERSION */ /* Define if overriding target triple is enabled */ /* #undef LLVM_TARGET_TRIPLE_ENV */ /* Whether tools show host and target info when invoked with --version */ #define LLVM_VERSION_PRINTER_SHOW_HOST_TARGET_INFO 1 /* Whether tools show optional build config flags when invoked with --version */ #define LLVM_VERSION_PRINTER_SHOW_BUILD_CONFIG 1 /* Define if libxml2 is supported on this platform. */ /* #undef LLVM_ENABLE_LIBXML2 */ /* Define to the extension used for shared libraries, say, ".so". */ #if defined(__APPLE__) #define LTDL_SHLIB_EXT ".dylib" #else #define LTDL_SHLIB_EXT ".so" #endif /* Define to the extension used for plugin libraries, say, ".so". */ #if defined(__APPLE__) #define LLVM_PLUGIN_EXT ".dylib" #else #define LLVM_PLUGIN_EXT ".so" #endif /* Define to the address where bug reports for this package should be sent. */ #define PACKAGE_BUGREPORT "https://bugs.freebsd.org/submit/" /* Define to the full name of this package. */ #define PACKAGE_NAME "LLVM" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "LLVM 19.1.2" +#define PACKAGE_STRING "LLVM 19.1.3" /* Define to the version of this package. */ -#define PACKAGE_VERSION "19.1.2" +#define PACKAGE_VERSION "19.1.3" /* Define to the vendor of this package. */ /* #undef PACKAGE_VENDOR */ /* Define to a function implementing stricmp */ /* #undef stricmp */ /* Define to a function implementing strdup */ /* #undef strdup */ /* Whether GlobalISel rule coverage is being collected */ #define LLVM_GISEL_COV_ENABLED 0 /* Define to the default GlobalISel coverage file prefix */ /* #undef LLVM_GISEL_COV_PREFIX */ /* Whether Timers signpost passes in Xcode Instruments */ #if defined(__APPLE__) #define LLVM_SUPPORT_XCODE_SIGNPOSTS 1 #else #define LLVM_SUPPORT_XCODE_SIGNPOSTS 0 #endif /* #undef HAVE_PROC_PID_RUSAGE */ #define HAVE_BUILTIN_THREAD_POINTER 1 #endif diff --git a/lib/clang/include/llvm/Config/llvm-config.h b/lib/clang/include/llvm/Config/llvm-config.h index 0d8253e78646..c3a1df747ff6 100644 --- a/lib/clang/include/llvm/Config/llvm-config.h +++ b/lib/clang/include/llvm/Config/llvm-config.h @@ -1,232 +1,232 @@ /*===------- llvm/Config/llvm-config.h - llvm configuration -------*- C -*-===*/ /* */ /* Part of the LLVM Project, under the Apache License v2.0 with LLVM */ /* Exceptions. */ /* See https://llvm.org/LICENSE.txt for license information. */ /* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ /* */ /*===----------------------------------------------------------------------===*/ /* This file enumerates variables from the LLVM configuration so that they can be in exported headers and won't override package specific directives. This is a C header that can be included in the llvm-c headers. */ #ifndef LLVM_CONFIG_H #define LLVM_CONFIG_H /* Define if LLVM_ENABLE_DUMP is enabled */ /* #undef LLVM_ENABLE_DUMP */ /* Target triple LLVM will generate code for by default */ /* Doesn't use `cmakedefine` because it is allowed to be empty. */ /* #undef LLVM_DEFAULT_TARGET_TRIPLE */ /* Define if threads enabled */ #define LLVM_ENABLE_THREADS 1 /* Has gcc/MSVC atomic intrinsics */ #define LLVM_HAS_ATOMICS 1 /* Host triple LLVM will be executed on */ /* #undef LLVM_HOST_TRIPLE */ /* LLVM architecture name for the native architecture, if available */ /* #undef LLVM_NATIVE_ARCH */ /* LLVM name for the native AsmParser init function, if available */ /* #undef LLVM_NATIVE_ASMPARSER */ /* LLVM name for the native AsmPrinter init function, if available */ /* #undef LLVM_NATIVE_ASMPRINTER */ /* LLVM name for the native Disassembler init function, if available */ /* #undef LLVM_NATIVE_DISASSEMBLER */ /* LLVM name for the native Target init function, if available */ /* #undef LLVM_NATIVE_TARGET */ /* LLVM name for the native TargetInfo init function, if available */ /* #undef LLVM_NATIVE_TARGETINFO */ /* LLVM name for the native target MC init function, if available */ /* #undef LLVM_NATIVE_TARGETMC */ /* LLVM name for the native target MCA init function, if available */ /* #undef LLVM_NATIVE_TARGETMCA */ /* Define if the AArch64 target is built in */ #ifdef LLVM_TARGET_ENABLE_AARCH64 #define LLVM_HAS_AARCH64_TARGET 1 #else #define LLVM_HAS_AARCH64_TARGET 0 #endif /* Define if the AMDGPU target is built in */ #define LLVM_HAS_AMDGPU_TARGET 0 /* Define if the ARC target is built in */ #define LLVM_HAS_ARC_TARGET 0 /* Define if the ARM target is built in */ #ifdef LLVM_TARGET_ENABLE_ARM #define LLVM_HAS_ARM_TARGET 1 #else #define LLVM_HAS_ARM_TARGET 0 #endif /* Define if the AVR target is built in */ #define LLVM_HAS_AVR_TARGET 0 /* Define if the BPF target is built in */ #ifdef LLVM_TARGET_ENABLE_BPF #define LLVM_HAS_BPF_TARGET 1 #else #define LLVM_HAS_BPF_TARGET 0 #endif /* Define if the CSKY target is built in */ #define LLVM_HAS_CSKY_TARGET 0 /* Define if the DirectX target is built in */ #define LLVM_HAS_DIRECTX_TARGET 0 /* Define if the Hexagon target is built in */ #define LLVM_HAS_HEXAGON_TARGET 0 /* Define if the Lanai target is built in */ #define LLVM_HAS_LANAI_TARGET 0 /* Define if the LoongArch target is built in */ #define LLVM_HAS_LOONGARCH_TARGET 0 /* Define if the M68k target is built in */ #define LLVM_HAS_M68K_TARGET 0 /* Define if the Mips target is built in */ #ifdef LLVM_TARGET_ENABLE_MIPS #define LLVM_HAS_MIPS_TARGET 1 #else #define LLVM_HAS_MIPS_TARGET 0 #endif /* Define if the MSP430 target is built in */ #define LLVM_HAS_MSP430_TARGET 0 /* Define if the NVPTX target is built in */ #define LLVM_HAS_NVPTX_TARGET 0 /* Define if the PowerPC target is built in */ #ifdef LLVM_TARGET_ENABLE_POWERPC #define LLVM_HAS_POWERPC_TARGET 1 #else #define LLVM_HAS_POWERPC_TARGET 0 #endif /* Define if the RISCV target is built in */ #ifdef LLVM_TARGET_ENABLE_RISCV #define LLVM_HAS_RISCV_TARGET 1 #else #define LLVM_HAS_RISCV_TARGET 0 #endif /* Define if the Sparc target is built in */ #define LLVM_HAS_SPARC_TARGET 0 /* Define if the SPIRV target is built in */ #define LLVM_HAS_SPIRV_TARGET 0 /* Define if the SystemZ target is built in */ #define LLVM_HAS_SYSTEMZ_TARGET 0 /* Define if the VE target is built in */ #define LLVM_HAS_VE_TARGET 0 /* Define if the WebAssembly target is built in */ #define LLVM_HAS_WEBASSEMBLY_TARGET 0 /* Define if the X86 target is built in */ #ifdef LLVM_TARGET_ENABLE_X86 #define LLVM_HAS_X86_TARGET 1 #else #define LLVM_HAS_X86_TARGET 0 #endif /* Define if the XCore target is built in */ #define LLVM_HAS_XCORE_TARGET 0 /* Define if the Xtensa target is built in */ #define LLVM_HAS_XTENSA_TARGET 0 /* Define if this is Unixish platform */ #define LLVM_ON_UNIX 1 /* Define if we have the Intel JIT API runtime support library */ #define LLVM_USE_INTEL_JITEVENTS 0 /* Define if we have the oprofile JIT-support library */ #define LLVM_USE_OPROFILE 0 /* Define if we have the perf JIT-support library */ #define LLVM_USE_PERF 0 /* Major version of the LLVM API */ #define LLVM_VERSION_MAJOR 19 /* Minor version of the LLVM API */ #define LLVM_VERSION_MINOR 1 /* Patch version of the LLVM API */ -#define LLVM_VERSION_PATCH 2 +#define LLVM_VERSION_PATCH 3 /* LLVM version string */ -#define LLVM_VERSION_STRING "19.1.2" +#define LLVM_VERSION_STRING "19.1.3" /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() */ #define LLVM_FORCE_ENABLE_STATS 0 /* Define if we have z3 and want to build it */ /* #undef LLVM_WITH_Z3 */ /* Define if we have curl and want to use it */ /* #undef LLVM_ENABLE_CURL */ /* Define if we have cpp-httplib and want to use it */ /* #undef LLVM_ENABLE_HTTPLIB */ /* Define if zlib compression is available */ #define LLVM_ENABLE_ZLIB 1 /* Define if zstd compression is available */ #define LLVM_ENABLE_ZSTD 1 /* Define if LLVM is using tflite */ /* #undef LLVM_HAVE_TFLITE */ /* Define to 1 if you have the header file. */ #define HAVE_SYSEXITS_H 1 /* Define if building libLLVM shared library */ /* #undef LLVM_BUILD_LLVM_DYLIB */ /* Define if building LLVM with BUILD_SHARED_LIBS */ /* #undef LLVM_BUILD_SHARED_LIBS */ /* Define if building LLVM with LLVM_FORCE_USE_OLD_TOOLCHAIN_LIBS */ /* #undef LLVM_FORCE_USE_OLD_TOOLCHAIN */ /* Define if llvm_unreachable should be optimized with undefined behavior * in non assert builds */ #define LLVM_UNREACHABLE_OPTIMIZE 1 /* Define to 1 if you have the DIA SDK installed, and to 0 if you don't. */ #define LLVM_ENABLE_DIA_SDK 0 /* Define if plugins enabled */ /* #undef LLVM_ENABLE_PLUGINS */ /* Define if logf128 is available */ /* #undef LLVM_HAS_LOGF128 */ #endif diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index a9cdf75b6b13..361bd66edf2f 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,2 +1,2 @@ -#define LLVM_REVISION "llvmorg-19.1.2-0-g7ba7d8e2f7b6" +#define LLVM_REVISION "llvmorg-19.1.3-0-gab51eccf88f5" #define LLVM_REPOSITORY "https://github.com/llvm/llvm-project.git" diff --git a/lib/libc++/__assertion_handler b/lib/libc++/__assertion_handler index 3b6d6b2cca53..e12ccccdaff3 100644 --- a/lib/libc++/__assertion_handler +++ b/lib/libc++/__assertion_handler @@ -1,40 +1,41 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _LIBCPP___ASSERTION_HANDLER #define _LIBCPP___ASSERTION_HANDLER #include <__config> #include <__verbose_abort> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif #if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_ASSERTION_HANDLER(message) _LIBCPP_VERBOSE_ABORT("%s", message) #else # if __has_builtin(__builtin_verbose_trap) // AppleClang shipped a slightly different version of __builtin_verbose_trap from the upstream // version before upstream Clang actually got the builtin. -# if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 17000 +// TODO: Remove once AppleClang supports the two-arguments version of the builtin. +# if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1700 # define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap(message) # else # define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap("libc++", message) # endif # else # define _LIBCPP_ASSERTION_HANDLER(message) ((void)message, __builtin_trap()) # endif #endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG #endif // _LIBCPP___ASSERTION_HANDLER diff --git a/lib/libomp/kmp_config.h b/lib/libomp/kmp_config.h index 3a8f726353bf..9caf4b8cdf44 100644 --- a/lib/libomp/kmp_config.h +++ b/lib/libomp/kmp_config.h @@ -1,196 +1,196 @@ /* * kmp_config.h -- Feature macros */ //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef KMP_CONFIG_H #define KMP_CONFIG_H #include "kmp_platform.h" // cmakedefine01 MACRO will define MACRO as either 0 or 1 // cmakedefine MACRO 1 will define MACRO as 1 or leave undefined #define DEBUG_BUILD 0 #define RELWITHDEBINFO_BUILD 0 #define LIBOMP_USE_ITT_NOTIFY 1 #define USE_ITT_NOTIFY LIBOMP_USE_ITT_NOTIFY #if ! LIBOMP_USE_ITT_NOTIFY # define INTEL_NO_ITTNOTIFY_API #endif #define LIBOMP_USE_VERSION_SYMBOLS 1 #if LIBOMP_USE_VERSION_SYMBOLS # define KMP_USE_VERSION_SYMBOLS #endif #define LIBOMP_HAVE_WEAK_ATTRIBUTE 1 #define KMP_HAVE_WEAK_ATTRIBUTE LIBOMP_HAVE_WEAK_ATTRIBUTE #define LIBOMP_HAVE_PSAPI 0 #define KMP_HAVE_PSAPI LIBOMP_HAVE_PSAPI #define LIBOMP_STATS 0 #define KMP_STATS_ENABLED LIBOMP_STATS #define LIBOMP_HAVE_X86INTRIN_H 0 #define KMP_HAVE_X86INTRIN_H LIBOMP_HAVE_X86INTRIN_H #define LIBOMP_HAVE___BUILTIN_READCYCLECOUNTER 0 #define KMP_HAVE___BUILTIN_READCYCLECOUNTER LIBOMP_HAVE___BUILTIN_READCYCLECOUNTER #define LIBOMP_HAVE___RDTSC 0 #define KMP_HAVE___RDTSC LIBOMP_HAVE___RDTSC #define LIBOMP_USE_DEBUGGER 0 #define USE_DEBUGGER LIBOMP_USE_DEBUGGER #define LIBOMP_OMPT_DEBUG 0 #define OMPT_DEBUG LIBOMP_OMPT_DEBUG #define LIBOMP_OMPT_SUPPORT 1 #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT #define LIBOMP_OMPD_SUPPORT 0 #define OMPD_SUPPORT LIBOMP_OMPD_SUPPORT #define LIBOMP_OMPX_TASKGRAPH 0 #define OMPX_TASKGRAPH LIBOMP_OMPX_TASKGRAPH #define LIBOMP_PROFILING_SUPPORT 0 #define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT #define LIBOMP_OMPT_OPTIONAL 1 #define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_USE_ADAPTIVE_LOCKS 1 #else #define LIBOMP_USE_ADAPTIVE_LOCKS 0 #endif // } FreeBSD customization #define KMP_USE_ADAPTIVE_LOCKS LIBOMP_USE_ADAPTIVE_LOCKS #define KMP_DEBUG_ADAPTIVE_LOCKS 0 #define LIBOMP_USE_INTERNODE_ALIGNMENT 0 #define KMP_USE_INTERNODE_ALIGNMENT LIBOMP_USE_INTERNODE_ALIGNMENT #define LIBOMP_ENABLE_ASSERTIONS 1 #define KMP_USE_ASSERT LIBOMP_ENABLE_ASSERTIONS #define LIBOMP_USE_HIER_SCHED 0 #define KMP_USE_HIER_SCHED LIBOMP_USE_HIER_SCHED #define STUBS_LIBRARY 0 #define LIBOMP_USE_HWLOC 0 #define KMP_USE_HWLOC LIBOMP_USE_HWLOC #define LIBOMP_ENABLE_SHARED 1 #define KMP_DYNAMIC_LIB LIBOMP_ENABLE_SHARED #if KMP_ARCH_X86 // FreeBSD customization { #define KMP_ARCH_STR "IA-32" #elif KMP_ARCH_X86_64 #define KMP_ARCH_STR "Intel(R) 64" #elif KMP_ARCH_AARCH64 #define KMP_ARCH_STR "AARCH64" #elif KMP_ARCH_PPC64 #define KMP_ARCH_STR "PPC64BE" #elif KMP_ARCH_MIPS #define KMP_ARCH_STR "MIPS" #elif KMP_ARCH_MIPS64 #define KMP_ARCH_STR "MIPS64" #elif KMP_ARCH_RISCV64 #define KMP_ARCH_STR "RISCV64" #else #error Unknown KMP_ARCH value #endif // } FreeBSD customization #define KMP_LIBRARY_FILE "libomp.so" #define KMP_VERSION_MAJOR 5 #define KMP_VERSION_MINOR 0 #define MSVC 0 #define KMP_MSVC_COMPAT MSVC #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_HAVE_WAITPKG_INTRINSICS 1 #else #define LIBOMP_HAVE_WAITPKG_INTRINSICS 0 #endif // } FreeBSD customization #define KMP_HAVE_WAITPKG_INTRINSICS LIBOMP_HAVE_WAITPKG_INTRINSICS #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_HAVE_RTM_INTRINSICS 1 #else #define LIBOMP_HAVE_RTM_INTRINSICS 0 #endif // } FreeBSD customization #define KMP_HAVE_RTM_INTRINSICS LIBOMP_HAVE_RTM_INTRINSICS #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_HAVE_IMMINTRIN_H 1 #else #define LIBOMP_HAVE_IMMINTRIN_H 0 #endif // } FreeBSD customization #define KMP_HAVE_IMMINTRIN_H LIBOMP_HAVE_IMMINTRIN_H #define LIBOMP_HAVE_INTRIN_H 0 #define KMP_HAVE_INTRIN_H LIBOMP_HAVE_INTRIN_H #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_HAVE_ATTRIBUTE_WAITPKG 1 #else #define LIBOMP_HAVE_ATTRIBUTE_WAITPKG 0 #endif // } FreeBSD customization #define KMP_HAVE_ATTRIBUTE_WAITPKG LIBOMP_HAVE_ATTRIBUTE_WAITPKG #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_HAVE_ATTRIBUTE_RTM 1 #else #define LIBOMP_HAVE_ATTRIBUTE_RTM 0 #endif // } FreeBSD customization #define KMP_HAVE_ATTRIBUTE_RTM LIBOMP_HAVE_ATTRIBUTE_RTM #define LIBOMP_ARCH_AARCH64_A64FX 0 #define KMP_ARCH_AARCH64_A64FX LIBOMP_ARCH_AARCH64_A64FX #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_HAVE_XMMINTRIN_H 1 #else #define LIBOMP_HAVE_XMMINTRIN_H 0 #endif // } FreeBSD customization #define KMP_HAVE_XMMINTRIN_H LIBOMP_HAVE_XMMINTRIN_H #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // FreeBSD customization { #define LIBOMP_HAVE__MM_MALLOC 1 #else #define LIBOMP_HAVE__MM_MALLOC 0 #endif // } FreeBSD customization #define KMP_HAVE__MM_MALLOC LIBOMP_HAVE__MM_MALLOC #define LIBOMP_HAVE_ALIGNED_ALLOC 1 #define KMP_HAVE_ALIGNED_ALLOC LIBOMP_HAVE_ALIGNED_ALLOC #define LIBOMP_HAVE_POSIX_MEMALIGN 1 #define KMP_HAVE_POSIX_MEMALIGN LIBOMP_HAVE_POSIX_MEMALIGN #define LIBOMP_HAVE__ALIGNED_MALLOC 0 #define KMP_HAVE__ALIGNED_MALLOC LIBOMP_HAVE__ALIGNED_MALLOC #define OPENMP_ENABLE_LIBOMPTARGET 0 #define ENABLE_LIBOMPTARGET OPENMP_ENABLE_LIBOMPTARGET // Configured cache line based on architecture -#if KMP_ARCH_PPC64 +#if KMP_ARCH_PPC64 || KMP_ARCH_PPC # define CACHE_LINE 128 #elif KMP_ARCH_AARCH64_A64FX # define CACHE_LINE 256 #elif KMP_ARCH_S390X # define CACHE_LINE 256 #else # define CACHE_LINE 64 #endif #if ! KMP_32_BIT_ARCH # define BUILD_I8 1 #endif #define KMP_NESTED_HOT_TEAMS 1 #define KMP_ADJUST_BLOCKTIME 1 #define BUILD_PARALLEL_ORDERED 1 #define KMP_ASM_INTRINS 1 #define USE_ITT_BUILD LIBOMP_USE_ITT_NOTIFY #define INTEL_ITTNOTIFY_PREFIX __kmp_itt_ #if ! KMP_MIC # define USE_LOAD_BALANCE 1 #endif #if ! (KMP_OS_WINDOWS || KMP_OS_DARWIN) # define KMP_TDATA_GTID 1 #endif #if STUBS_LIBRARY # define KMP_STUB 1 #endif #if DEBUG_BUILD || RELWITHDEBINFO_BUILD # define KMP_DEBUG 1 #endif #if KMP_OS_WINDOWS # define KMP_WIN_CDECL #else # define BUILD_TV # define KMP_GOMP_COMPAT #endif // use shared memory with dynamic library (except Android, where shm_* // functions don't exist). #if KMP_OS_UNIX && KMP_DYNAMIC_LIB && !__ANDROID__ #define KMP_USE_SHM #endif #endif // KMP_CONFIG_H